Add optimized vpx_blend_mask6
This is to replace vp10/common/reconinter.c:build_masked_compound. Functionality is equivalent, but the interface is slightly more generic. Total encoder speedup with ext-inter: ~7.5% Change-Id: Iee18b83ae324ffc9c7f7dc16d4b2b06adb4d4305
This commit is contained in:
Родитель
7488ae014b
Коммит
a661bc87c4
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TEST_ASSERTION_HELPERS_H_
|
||||
#define TEST_ASSERTION_HELPERS_H_
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
namespace assertion_helpers {
|
||||
|
||||
// Arrays (1D) are element-wise equal
|
||||
template<typename E, size_t n>
|
||||
::testing::AssertionResult ArraysEq(const E (&a)[n],
|
||||
const E (&b)[n]) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
const E &va = a[i];
|
||||
const E &vb = b[i];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Arrays (1D) are element-wise equal
|
||||
// within the index interval [lo, hi)
|
||||
template<typename E, size_t n>
|
||||
::testing::AssertionResult ArraysEqWithin(const E (&a)[n],
|
||||
const E (&b)[n],
|
||||
const size_t lo,
|
||||
const size_t hi) {
|
||||
assert(hi > lo);
|
||||
assert(hi <= n);
|
||||
|
||||
for (size_t i = lo; i < hi; i++) {
|
||||
const E &va = a[i];
|
||||
const E &vb = b[i];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Arrays (1D) are element-wise equal
|
||||
// outside the index interval [lo, hi)
|
||||
template<typename E, size_t n>
|
||||
::testing::AssertionResult ArraysEqOutside(const E (&a)[n],
|
||||
const E (&b)[n],
|
||||
const size_t lo,
|
||||
const size_t hi) {
|
||||
assert(hi > lo);
|
||||
assert(hi <= n);
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (lo <= i && i < hi)
|
||||
continue;
|
||||
|
||||
const E &va = a[i];
|
||||
const E &vb = b[i];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Arrays (2D) are element-wise equal
|
||||
template<typename E, size_t n, size_t m>
|
||||
::testing::AssertionResult ArraysEq(const E (&a)[n][m],
|
||||
const E (&b)[n][m]) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
for (size_t j = 0; j < m; j++) {
|
||||
const E &va = a[i][j];
|
||||
const E &vb = b[i][j];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "][" << j << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Arrays (2D) are element-wise equal
|
||||
// within the index interval [lo0, hi0) x [lo1, hi1) (Cartesian product)
|
||||
template<typename E, size_t n, size_t m>
|
||||
::testing::AssertionResult ArraysEqWithin(const E (&a)[n][m],
|
||||
const E (&b)[n][m],
|
||||
const size_t lo0,
|
||||
const size_t hi0,
|
||||
const size_t lo1,
|
||||
const size_t hi1) {
|
||||
assert(hi0 > lo0);
|
||||
assert(hi0 <= n);
|
||||
assert(hi1 > lo1);
|
||||
assert(hi1 <= m);
|
||||
|
||||
for (size_t i = lo0; i < hi0; i++) {
|
||||
for (size_t j = lo1; j < hi1; j++) {
|
||||
const E &va = a[i][j];
|
||||
const E &vb = b[i][j];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "][" << j << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Arrays (2D) are element-wise equal
|
||||
// outside the index interval [lo0, hi0) x [lo1, hi1) (Cartesian product)
|
||||
template<typename E, size_t n, size_t m>
|
||||
::testing::AssertionResult ArraysEqOutside(const E (&a)[n][m],
|
||||
const E (&b)[n][m],
|
||||
const size_t lo0,
|
||||
const size_t hi0,
|
||||
const size_t lo1,
|
||||
const size_t hi1) {
|
||||
assert(hi0 > lo0);
|
||||
assert(hi0 <= n);
|
||||
assert(hi1 > lo1);
|
||||
assert(hi1 <= m);
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (lo0 <= i && i < hi0)
|
||||
continue;
|
||||
|
||||
for (size_t j = 0; j < m; j++) {
|
||||
if (lo1 <= j && j < hi1)
|
||||
continue;
|
||||
|
||||
const E &va = a[i][j];
|
||||
const E &vb = b[i][j];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << i << "][" << j << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Non contiguous 2D array buffers are element-wise equal
|
||||
// at corresponding linear indices specified by rows/cols/stride/offset
|
||||
template<typename E, size_t n, size_t m>
|
||||
::testing::AssertionResult BuffersEqWithin(const E (&a)[n][m],
|
||||
const E (&b)[n][m],
|
||||
const size_t stridea,
|
||||
const size_t strideb,
|
||||
const size_t offseta,
|
||||
const size_t offsetb,
|
||||
const size_t rows,
|
||||
const size_t cols) {
|
||||
assert(rows <= n);
|
||||
assert(cols <= m);
|
||||
assert(stridea <= m);
|
||||
assert(strideb <= m);
|
||||
assert(cols <= stridea);
|
||||
assert(cols <= strideb);
|
||||
assert(offseta < n * m);
|
||||
assert(offsetb < n * m);
|
||||
assert(offseta + (rows - 1) * stridea + (cols - 1) < n * m);
|
||||
assert(offsetb + (rows - 1) * strideb + (cols - 1) < n * m);
|
||||
|
||||
const E *pa = &a[0][0] + offseta;
|
||||
const E *pb = &b[0][0] + offsetb;
|
||||
|
||||
for (size_t r = 0 ; r < rows ; r++) {
|
||||
for (size_t c = 0 ; c < cols ; c++) {
|
||||
const E &va = pa[c];
|
||||
const E &vb = pb[c];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at linear index "
|
||||
<< "[" << pa - &a[0][0] << "] vs [" << pb - &b[0][0] << "]"
|
||||
<< " row=" << r << " col=" << c
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
}
|
||||
pa += stridea;
|
||||
pb += strideb;
|
||||
}
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
// Non contiguous 2D array buffers are element-wise equal
|
||||
// except at corresponding linear indices specified by
|
||||
// rows/cols/stride/offset.
|
||||
template<typename E, size_t n, size_t m>
|
||||
::testing::AssertionResult BuffersEqOutside(const E (&a)[n][m],
|
||||
const E (&b)[n][m],
|
||||
const size_t stride,
|
||||
const size_t offset,
|
||||
const size_t rows,
|
||||
const size_t cols ) {
|
||||
assert(rows <= n);
|
||||
assert(cols <= m);
|
||||
assert(stride <= m);
|
||||
assert(cols <= stride);
|
||||
assert(offset < n * m);
|
||||
assert(offset + (rows - 1) * stride + (cols - 1) < n * m);
|
||||
|
||||
const E *const pa = &a[0][0];
|
||||
const E *const pb = &b[0][0];
|
||||
|
||||
size_t idx = 0;
|
||||
size_t r = 0;
|
||||
size_t end = offset; // beginning of first row
|
||||
|
||||
while (idx < n * m) {
|
||||
while (idx < end) { // until beginning of row or end of buffer
|
||||
const E &va = pa[idx];
|
||||
const E &vb = pb[idx];
|
||||
if (va != vb) {
|
||||
return ::testing::AssertionFailure()
|
||||
<< "Arrays do not equal at index "
|
||||
<< "[" << idx / m << "][" << idx % m << "]"
|
||||
<< " values are: " << va << " vs " << vb;
|
||||
}
|
||||
|
||||
idx++;
|
||||
}
|
||||
|
||||
// Move past row end
|
||||
idx += cols;
|
||||
|
||||
if (++r < rows) {
|
||||
// Move to next row
|
||||
end += stride;
|
||||
} else {
|
||||
// Move to end of buffer
|
||||
end = n * m;
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
assert(idx == n * m + cols);
|
||||
|
||||
return ::testing::AssertionSuccess();
|
||||
}
|
||||
|
||||
} // namespace assertion_helpers
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_ASSERTION_HELPERS_H_
|
|
@ -0,0 +1,311 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
#include "test/function_equivalence_test.h"
|
||||
#include "test/randomise.h"
|
||||
#include "test/snapshot.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#include "./vp10_rtcd.h"
|
||||
|
||||
#include "test/assertion_helpers.h"
|
||||
#include "vp10/common/enums.h"
|
||||
|
||||
using libvpx_test::assertion_helpers::BuffersEqWithin;
|
||||
using libvpx_test::assertion_helpers::BuffersEqOutside;
|
||||
using libvpx_test::assertion_helpers::ArraysEq;
|
||||
using libvpx_test::FunctionEquivalenceTest;
|
||||
using libvpx_test::Snapshot;
|
||||
using libvpx_test::Randomise;
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename F, typename T>
|
||||
class BlendMask6Test : public FunctionEquivalenceTest<F> {
|
||||
protected:
|
||||
virtual ~BlendMask6Test() {}
|
||||
|
||||
virtual void Execute(T *p_src0, T *p_src1) = 0;
|
||||
|
||||
void Common() {
|
||||
w = 1 << randomise.uniform<int>(2, MAX_SB_SIZE_LOG2 + 1);
|
||||
h = 1 << randomise.uniform<int>(2, MAX_SB_SIZE_LOG2 + 1);
|
||||
|
||||
randomise(subx);
|
||||
randomise(suby);
|
||||
|
||||
randomise(dst_offset, 0, 32);
|
||||
randomise(dst_stride, w, MAX_SB_SIZE * 5 + 1);
|
||||
|
||||
randomise(src0_offset, 0, 32);
|
||||
randomise(src0_stride, w, MAX_SB_SIZE * 5 + 1);
|
||||
|
||||
randomise(src1_offset, 0, 32);
|
||||
randomise(src1_stride, w, MAX_SB_SIZE * 5 + 1);
|
||||
|
||||
randomise(mask_stride, w * (subx ? 2: 1), 2 * MAX_SB_SIZE + 1);
|
||||
|
||||
T *p_src0;
|
||||
T *p_src1;
|
||||
|
||||
switch (randomise.uniform<int>(3)) {
|
||||
case 0: // Separate sources
|
||||
p_src0 = &src0[0][0];
|
||||
p_src1 = &src1[0][0];
|
||||
break;
|
||||
case 1: // src0 == dst
|
||||
p_src0 = &dst_tst[0][0];
|
||||
src0_stride = dst_stride;
|
||||
src0_offset = dst_offset;
|
||||
p_src1 = &src1[0][0];
|
||||
break;
|
||||
case 2: // src1 == dst
|
||||
p_src0 = &src0[0][0];
|
||||
p_src1 = &dst_tst[0][0];
|
||||
src1_stride = dst_stride;
|
||||
src1_offset = dst_offset;
|
||||
break;
|
||||
default:
|
||||
FAIL();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Prepare
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
snapshot(dst_ref);
|
||||
snapshot(dst_tst);
|
||||
|
||||
snapshot(src0);
|
||||
snapshot(src1);
|
||||
|
||||
snapshot(mask);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Execute
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Execute(p_src0, p_src1);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Check
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ASSERT_TRUE(BuffersEqWithin(dst_ref, dst_tst,
|
||||
dst_stride, dst_stride,
|
||||
dst_offset, dst_offset,
|
||||
h, w));
|
||||
|
||||
ASSERT_TRUE(ArraysEq(snapshot.get(src0), src0));
|
||||
ASSERT_TRUE(ArraysEq(snapshot.get(src1), src1));
|
||||
ASSERT_TRUE(ArraysEq(snapshot.get(mask), mask));
|
||||
|
||||
ASSERT_TRUE(BuffersEqOutside(snapshot.get(dst_ref), dst_ref,
|
||||
dst_stride,
|
||||
dst_offset,
|
||||
h, w));
|
||||
|
||||
ASSERT_TRUE(BuffersEqOutside(snapshot.get(dst_tst), dst_tst,
|
||||
dst_stride,
|
||||
dst_offset,
|
||||
h, w));
|
||||
}
|
||||
|
||||
Snapshot snapshot;
|
||||
Randomise randomise;
|
||||
|
||||
T dst_ref[MAX_SB_SIZE][MAX_SB_SIZE * 5];
|
||||
T dst_tst[MAX_SB_SIZE][MAX_SB_SIZE * 5];
|
||||
size_t dst_stride;
|
||||
size_t dst_offset;
|
||||
|
||||
T src0[MAX_SB_SIZE][MAX_SB_SIZE * 5];
|
||||
size_t src0_stride;
|
||||
size_t src0_offset;
|
||||
|
||||
T src1[MAX_SB_SIZE][MAX_SB_SIZE * 5];
|
||||
size_t src1_stride;
|
||||
size_t src1_offset;
|
||||
|
||||
uint8_t mask[2 * MAX_SB_SIZE][2 * MAX_SB_SIZE];
|
||||
size_t mask_stride;
|
||||
|
||||
int w;
|
||||
int h;
|
||||
|
||||
bool suby;
|
||||
bool subx;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// 8 bit version
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride,
|
||||
uint8_t *src0, uint32_t src0_stride,
|
||||
uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride,
|
||||
int h, int w, int suby, int subx);
|
||||
|
||||
class BlendMask6Test8B : public BlendMask6Test<F8B, uint8_t> {
|
||||
protected:
|
||||
void Execute(uint8_t *p_src0, uint8_t *p_src1) {
|
||||
ref_func_(&dst_ref[0][dst_offset], dst_stride,
|
||||
p_src0 + src0_offset, src0_stride,
|
||||
p_src1 + src1_offset, src1_stride,
|
||||
&mask[0][0], sizeof(mask[0]),
|
||||
h, w, suby, subx);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
tst_func_(&dst_tst[0][dst_offset], dst_stride,
|
||||
p_src0 + src0_offset, src0_stride,
|
||||
p_src1 + src1_offset, src1_stride,
|
||||
&mask[0][0], sizeof(mask[0]),
|
||||
h, w, suby, subx));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(BlendMask6Test8B, RandomValues) {
|
||||
for (int i = 0 ; i < 10000 && !HasFatalFailure(); i++) {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Randomise
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
randomise(dst_ref);
|
||||
randomise(dst_tst);
|
||||
|
||||
randomise(src0);
|
||||
randomise(src1);
|
||||
|
||||
randomise(mask, 65);
|
||||
|
||||
Common();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BlendMask6Test8B, ExtremeValues) {
|
||||
for (int i = 0 ; i < 1000 && !HasFatalFailure(); i++) {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Randomise
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
randomise(dst_ref, 254, 256);
|
||||
randomise(dst_tst, 254, 256);
|
||||
|
||||
randomise(src0, 254, 256);
|
||||
randomise(src1, 254, 256);
|
||||
|
||||
randomise(mask, 63, 65);
|
||||
|
||||
Common();
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE4_1_C_COMPARE, BlendMask6Test8B,
|
||||
::testing::Values(make_tuple(&vpx_blend_mask6_c, &vpx_blend_mask6_sse4_1)));
|
||||
#endif // HAVE_SSE4_1
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// High bit-depth version
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride,
|
||||
uint8_t *src0, uint32_t src0_stride,
|
||||
uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride,
|
||||
int h, int w, int suby, int subx, int bd);
|
||||
|
||||
class BlendMask6TestHBD : public BlendMask6Test<FHBD, uint16_t> {
|
||||
protected:
|
||||
void Execute(uint16_t *p_src0, uint16_t *p_src1) {
|
||||
ref_func_(CONVERT_TO_BYTEPTR(&dst_ref[0][dst_offset]), dst_stride,
|
||||
CONVERT_TO_BYTEPTR(p_src0 + src0_offset), src0_stride,
|
||||
CONVERT_TO_BYTEPTR(p_src1 + src1_offset), src1_stride,
|
||||
&mask[0][0], sizeof(mask[0]),
|
||||
h, w, suby, subx, bit_depth);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
tst_func_(CONVERT_TO_BYTEPTR(&dst_tst[0][dst_offset]), dst_stride,
|
||||
CONVERT_TO_BYTEPTR(p_src0 + src0_offset), src0_stride,
|
||||
CONVERT_TO_BYTEPTR(p_src1 + src1_offset), src1_stride,
|
||||
&mask[0][0], sizeof(mask[0]),
|
||||
h, w, suby, subx, bit_depth));
|
||||
}
|
||||
|
||||
int bit_depth;
|
||||
};
|
||||
|
||||
TEST_P(BlendMask6TestHBD, RandomValues) {
|
||||
for (int i = 0 ; i < 10000 && !HasFatalFailure(); i++) {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Randomise
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bit_depth = randomise.choice(8, 10, 12);
|
||||
|
||||
const int hi = 1 << bit_depth;
|
||||
|
||||
randomise(dst_ref, hi);
|
||||
randomise(dst_tst, hi);
|
||||
|
||||
randomise(src0, hi);
|
||||
randomise(src1, hi);
|
||||
|
||||
randomise(mask, 65);
|
||||
|
||||
Common();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BlendMask6TestHBD, ExtremeValues) {
|
||||
for (int i = 0 ; i < 1000 && !HasFatalFailure(); i++) {
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Randomise
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bit_depth = randomise.choice(8, 10, 12);
|
||||
|
||||
const int hi = 1 << bit_depth;
|
||||
const int lo = hi - 2;
|
||||
|
||||
randomise(dst_ref, lo, hi);
|
||||
randomise(dst_tst, lo, hi);
|
||||
|
||||
randomise(src0, lo, hi);
|
||||
randomise(src1, lo, hi);
|
||||
|
||||
randomise(mask, 63, 65);
|
||||
|
||||
Common();
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE4_1_C_COMPARE, BlendMask6TestHBD,
|
||||
::testing::Values(make_tuple(&vpx_highbd_blend_mask6_c,
|
||||
&vpx_highbd_blend_mask6_sse4_1)));
|
||||
#endif // HAVE_SSE4_1
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
} // namespace
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_FUNCTION_EQUIVALENCE_TEST_H_
|
||||
#define TEST_FUNCTION_EQUIVALENCE_TEST_H_
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/util.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
template <typename T>
|
||||
class FunctionEquivalenceTest :
|
||||
public ::testing::TestWithParam< std::tr1::tuple< T, T > > {
|
||||
public:
|
||||
virtual ~FunctionEquivalenceTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
ref_func_ = std::tr1::get<0>(this->GetParam());
|
||||
tst_func_ = std::tr1::get<1>(this->GetParam());
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
|
||||
protected:
|
||||
T ref_func_;
|
||||
T tst_func_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
#endif // TEST_FUNCTION_EQUIVALENCE_TEST_H_
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_RANDOMISE_H_
|
||||
#define TEST_RANDOMISE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "test/acm_random.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
// TODO(any): Replace this when built with C++11
|
||||
#define STATIC_ASSERT_INTEGER_TYPE_(T) \
|
||||
GTEST_COMPILE_ASSERT_(std::numeric_limits<T>::is_integer, \
|
||||
integer_type_required);
|
||||
|
||||
/**
|
||||
* Deterministic random number generator with various convenience methods.
|
||||
*/
|
||||
class Randomise {
|
||||
public:
|
||||
Randomise() {
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
virtual ~Randomise() { }
|
||||
|
||||
// Uniformly distributed random number from the range
|
||||
// [std::numeric_limits<R>::min(), and std::numeric_limits<R>::max()]
|
||||
template<typename R>
|
||||
R uniform() {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(R);
|
||||
}
|
||||
|
||||
// Uniformly distributed random number from the range
|
||||
// [0, hi)
|
||||
template<typename R, typename H>
|
||||
R uniform(H hi) {
|
||||
assert(hi > 0);
|
||||
R v = uniform<R>();
|
||||
if (std::numeric_limits<R>::is_signed && v < 0)
|
||||
return -v % hi;
|
||||
else
|
||||
return v % hi;
|
||||
}
|
||||
|
||||
// Uniformly distributed random number from the range
|
||||
// [lo, hi)
|
||||
template<typename R, typename L, typename H>
|
||||
R uniform(L lo, H hi) {
|
||||
assert(hi > lo);
|
||||
return uniform<R, H>(hi - lo) + lo;
|
||||
}
|
||||
|
||||
// Randomly pick and return one of the arguments
|
||||
template<typename T>
|
||||
T choice(T v0, T v1) {
|
||||
switch (uniform<int>(2)) {
|
||||
case 0: return v0;
|
||||
default: return v1;
|
||||
}
|
||||
}
|
||||
|
||||
// Randomly pick and return one of the arguments
|
||||
template<typename T>
|
||||
T choice(T v0, T v1, T v2) {
|
||||
switch (uniform<int>(3)) {
|
||||
case 0: return v0;
|
||||
case 1: return v1;
|
||||
default: return v2;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void operator()(T &e) { // NOLINT
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
e = uniform<T>();
|
||||
}
|
||||
|
||||
template<typename T, typename H>
|
||||
void operator()(T &e, H hi) { // NOLINT
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
e = uniform<T, H>(hi);
|
||||
}
|
||||
|
||||
template<typename T, typename L, typename H>
|
||||
void operator()(T &e, L lo, H hi) { // NOLINT
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
e = uniform<T, L, H>(lo, hi);
|
||||
}
|
||||
|
||||
template<typename T, size_t n>
|
||||
void operator()(T (&arr)[n]) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
arr[i] = uniform<T>();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, size_t n, typename H>
|
||||
void operator()(T (&arr)[n], H hi) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
arr[i] = uniform<T, H>(hi);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, size_t n, typename L, typename H>
|
||||
void operator()(T (&arr)[n], L lo, H hi) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
arr[i] = uniform<T, L, H>(lo, hi);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, size_t n, size_t m>
|
||||
void operator()(T (&arr)[n][m]) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
for (size_t j = 0; j < m ; j++) {
|
||||
arr[i][j] = uniform<T>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, size_t n, size_t m, typename H>
|
||||
void operator()(T (&arr)[n][m], H hi) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
for (size_t j = 0; j < m ; j++) {
|
||||
arr[i][j] = uniform<T, H>(hi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, size_t n, size_t m, typename L, typename H>
|
||||
void operator()(T (&arr)[n][m], L lo, H hi) {
|
||||
STATIC_ASSERT_INTEGER_TYPE_(T);
|
||||
for (size_t i = 0; i < n ; i++) {
|
||||
for (size_t j = 0; j < m ; j++) {
|
||||
arr[i][j] = uniform<T, L, H>(lo, hi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
libvpx_test::ACMRandom rnd_;
|
||||
};
|
||||
|
||||
// Add further specialisations as necessary
|
||||
|
||||
template<>
|
||||
bool Randomise::uniform<bool>() {
|
||||
return rnd_.Rand8() & 1 ? true : false;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint8_t Randomise::uniform<uint8_t>() {
|
||||
return rnd_.Rand8();
|
||||
}
|
||||
|
||||
template<>
|
||||
uint16_t Randomise::uniform<uint16_t>() {
|
||||
return rnd_.Rand16();
|
||||
}
|
||||
|
||||
template<>
|
||||
uint32_t Randomise::uniform<uint32_t>() {
|
||||
const uint32_t l = uniform<uint16_t>();
|
||||
const uint32_t h = uniform<uint16_t>();
|
||||
return h << 16 | l;
|
||||
}
|
||||
|
||||
template<>
|
||||
uint64_t Randomise::uniform<uint64_t>() {
|
||||
const uint64_t l = uniform<uint32_t>();
|
||||
const uint64_t h = uniform<uint32_t>();
|
||||
return h << 32 | l;
|
||||
}
|
||||
|
||||
template<>
|
||||
int8_t Randomise::uniform<int8_t>() { return uniform<uint8_t>(); }
|
||||
|
||||
template<>
|
||||
int16_t Randomise::uniform<int16_t>() { return uniform<uint16_t>(); }
|
||||
|
||||
template<>
|
||||
int32_t Randomise::uniform<int32_t>() { return uniform<uint32_t>(); }
|
||||
|
||||
template<>
|
||||
int64_t Randomise::uniform<int64_t>() { return uniform<uint64_t>(); }
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_RANDOMISE_H_
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_SNAPSHOT_H_
|
||||
#define TEST_SNAPSHOT_H_
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
/**
|
||||
* Allows capturing and retrieving snapshots of arbitrary blobs of memory,
|
||||
* blob size is based on compile time type information.
|
||||
*
|
||||
* Usage:
|
||||
* void example() {
|
||||
* Snapshot snapshot;
|
||||
*
|
||||
* int foo = 4;
|
||||
*
|
||||
* snapshot(foo);
|
||||
*
|
||||
* foo = 10;
|
||||
*
|
||||
* assert(snapshot.get(foo) == 4); // Pass
|
||||
* assert(snapshot.get(foo) == foo); // Fail (4 != 10)
|
||||
*
|
||||
* char bar[10][10];
|
||||
* memset(bar, 3, sizeof(bar));
|
||||
*
|
||||
* snapshot(bar);
|
||||
*
|
||||
* memset(bar, 8, sizeof(bar));
|
||||
*
|
||||
* assert(sum(bar) == 800); // Pass
|
||||
* assert(sum(snapshot.get(bar)) == 300); // Pass
|
||||
* }
|
||||
*/
|
||||
class Snapshot {
|
||||
public:
|
||||
virtual ~Snapshot() {
|
||||
for (snapshot_map_t::iterator it = snapshots_.begin();
|
||||
it != snapshots_.end(); it++) {
|
||||
delete[] it->second;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Take new snapshot for object
|
||||
*/
|
||||
template<typename E>
|
||||
void take(const E &e) {
|
||||
const void *const key = reinterpret_cast<const void*>(&e);
|
||||
|
||||
snapshot_map_t::iterator it = snapshots_.find(key);
|
||||
|
||||
if (it != snapshots_.end())
|
||||
delete[] it->second;
|
||||
|
||||
char *const buf = new char[sizeof(E)];
|
||||
|
||||
memcpy(buf, &e, sizeof(E));
|
||||
|
||||
snapshots_[key] = buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as 'take'
|
||||
*/
|
||||
template<typename E>
|
||||
void operator()(const E &e) {
|
||||
take(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve last snapshot for object
|
||||
*/
|
||||
template<typename E>
|
||||
const E& get(const E &e) const {
|
||||
const void *const key = reinterpret_cast<const void*>(&e);
|
||||
|
||||
snapshot_map_t::const_iterator it = snapshots_.find(key);
|
||||
|
||||
assert(it != snapshots_.end());
|
||||
|
||||
return *reinterpret_cast<const E*>(it->second);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef std::map<const void*, const char*> snapshot_map_t;
|
||||
|
||||
snapshot_map_t snapshots_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_SNAPSHOT_H_
|
|
@ -178,6 +178,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc
|
|||
ifeq ($(CONFIG_EXT_INTER),yes)
|
||||
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc
|
||||
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += blend_mask6_test.cc
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <assert.h>
|
||||
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
@ -410,119 +411,6 @@ const uint8_t *vp10_get_soft_mask(int wedge_index,
|
|||
return mask;
|
||||
}
|
||||
|
||||
static void build_masked_compound(uint8_t *dst, int dst_stride,
|
||||
uint8_t *dst1, int dst1_stride,
|
||||
uint8_t *dst2, int dst2_stride,
|
||||
const uint8_t *mask,
|
||||
int h, int w, int subh, int subw) {
|
||||
int i, j;
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = mask[i * MASK_MASTER_STRIDE + j];
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j + 1)] + 2) >> 2;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[i * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[i * MASK_MASTER_STRIDE + (2 * j + 1)] + 1) >> 1;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[(2 * i) * MASK_MASTER_STRIDE + j] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + j] + 1) >> 1;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride,
|
||||
uint8_t *dst1_8, int dst1_stride,
|
||||
uint8_t *dst2_8, int dst2_stride,
|
||||
const uint8_t *mask,
|
||||
int h, int w, int subh, int subw) {
|
||||
int i, j;
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
|
||||
uint16_t *dst1 = CONVERT_TO_SHORTPTR(dst1_8);
|
||||
uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = mask[i * MASK_MASTER_STRIDE + j];
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j + 1)] + 2) >> 2;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[i * MASK_MASTER_STRIDE + (2 * j)] +
|
||||
mask[i * MASK_MASTER_STRIDE + (2 * j + 1)] + 1) >> 1;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
int m = (mask[(2 * i) * MASK_MASTER_STRIDE + j] +
|
||||
mask[(2 * i + 1) * MASK_MASTER_STRIDE + j] + 1) >> 1;
|
||||
dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
|
||||
dst2[i * dst2_stride + j] *
|
||||
((1 << WEDGE_WEIGHT_BITS) - m) +
|
||||
(1 << (WEDGE_WEIGHT_BITS - 1))) >>
|
||||
WEDGE_WEIGHT_BITS;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_SUPERTX
|
||||
static void build_masked_compound_wedge_extend(
|
||||
|
@ -537,9 +425,11 @@ static void build_masked_compound_wedge_extend(
|
|||
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
|
||||
const uint8_t *mask = vp10_get_soft_mask(
|
||||
wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y);
|
||||
build_masked_compound(dst, dst_stride,
|
||||
dst, dst_stride, dst2, dst2_stride, mask,
|
||||
h, w, subh, subw);
|
||||
vpx_blend_mask6(dst, dst_stride,
|
||||
dst, dst_stride,
|
||||
dst2, dst2_stride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
h, w, subh, subw);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -549,14 +439,16 @@ static void build_masked_compound_wedge_extend_highbd(
|
|||
int wedge_index, int wedge_sign,
|
||||
BLOCK_SIZE sb_type,
|
||||
int wedge_offset_x, int wedge_offset_y,
|
||||
int h, int w) {
|
||||
int h, int w, int bd) {
|
||||
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
|
||||
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
|
||||
const uint8_t *mask = vp10_get_soft_mask(
|
||||
wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y);
|
||||
build_masked_compound_highbd(dst_8, dst_stride,
|
||||
dst_8, dst_stride, dst2_8, dst2_stride, mask,
|
||||
h, w, subh, subw);
|
||||
vpx_highbd_blend_mask6(dst_8, dst_stride,
|
||||
dst_8, dst_stride,
|
||||
dst2_8, dst2_stride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
h, w, subh, subw, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
|
@ -573,9 +465,11 @@ static void build_masked_compound_wedge(uint8_t *dst, int dst_stride,
|
|||
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
|
||||
const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
|
||||
sb_type, 0, 0);
|
||||
build_masked_compound(dst, dst_stride,
|
||||
dst, dst_stride, dst2, dst2_stride, mask,
|
||||
h, w, subh, subw);
|
||||
vpx_blend_mask6(dst, dst_stride,
|
||||
dst, dst_stride,
|
||||
dst2, dst2_stride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
h, w, subh, subw);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -583,16 +477,18 @@ static void build_masked_compound_wedge_highbd(uint8_t *dst_8, int dst_stride,
|
|||
uint8_t *dst2_8, int dst2_stride,
|
||||
int wedge_index, int wedge_sign,
|
||||
BLOCK_SIZE sb_type,
|
||||
int h, int w) {
|
||||
int h, int w, int bd) {
|
||||
// Derive subsampling from h and w passed in. May be refactored to
|
||||
// pass in subsampling factors directly.
|
||||
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
|
||||
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
|
||||
const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
|
||||
sb_type, 0, 0);
|
||||
build_masked_compound_highbd(dst_8, dst_stride,
|
||||
dst_8, dst_stride, dst2_8, dst2_stride, mask,
|
||||
h, w, subh, subw);
|
||||
vpx_highbd_blend_mask6(dst_8, dst_stride,
|
||||
dst_8, dst_stride,
|
||||
dst2_8, dst2_stride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
h, w, subh, subw, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // CONFIG_SUPERTX
|
||||
|
@ -641,7 +537,7 @@ void vp10_make_masked_inter_predictor(
|
|||
mi->mbmi.interinter_wedge_index,
|
||||
mi->mbmi.interinter_wedge_sign,
|
||||
mi->mbmi.sb_type,
|
||||
wedge_offset_x, wedge_offset_y, h, w);
|
||||
wedge_offset_x, wedge_offset_y, h, w, xd->cur_buf->bit_depth);
|
||||
else
|
||||
build_masked_compound_wedge_extend(
|
||||
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
|
||||
|
@ -655,7 +551,7 @@ void vp10_make_masked_inter_predictor(
|
|||
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
|
||||
mi->mbmi.interinter_wedge_index,
|
||||
mi->mbmi.interinter_wedge_sign,
|
||||
mi->mbmi.sb_type, h, w);
|
||||
mi->mbmi.sb_type, h, w, xd->cur_buf->bit_depth);
|
||||
else
|
||||
build_masked_compound_wedge(
|
||||
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
|
||||
|
@ -1872,10 +1768,11 @@ static void combine_interintra(INTERINTRA_MODE mode,
|
|||
bsize, 0, 0);
|
||||
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
|
||||
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
|
||||
build_masked_compound(comppred, compstride,
|
||||
intrapred, intrastride,
|
||||
interpred, interstride, mask,
|
||||
bh, bw, subh, subw);
|
||||
vpx_blend_mask6(comppred, compstride,
|
||||
intrapred, intrastride,
|
||||
interpred, interstride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
bh, bw, subh, subw);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1995,7 +1892,6 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode,
|
|||
uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
|
||||
uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
|
||||
uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
|
||||
(void) bd;
|
||||
|
||||
if (use_wedge_interintra) {
|
||||
if (is_interintra_wedge_used(bsize)) {
|
||||
|
@ -2003,10 +1899,11 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode,
|
|||
bsize, 0, 0);
|
||||
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
|
||||
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
|
||||
build_masked_compound_highbd(comppred8, compstride,
|
||||
intrapred8, intrastride,
|
||||
interpred8, interstride, mask,
|
||||
bh, bw, subh, subw);
|
||||
vpx_highbd_blend_mask6(comppred8, compstride,
|
||||
intrapred8, intrastride,
|
||||
interpred8, interstride,
|
||||
mask, MASK_MASTER_STRIDE,
|
||||
bh, bw, subh, subw, bd);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2460,7 +2357,7 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
|
|||
mi->mbmi.interinter_wedge_index,
|
||||
mi->mbmi.interinter_wedge_sign,
|
||||
mi->mbmi.sb_type,
|
||||
wedge_offset_x, wedge_offset_y, h, w);
|
||||
wedge_offset_x, wedge_offset_y, h, w, xd->cur_buf->bit_depth);
|
||||
} else {
|
||||
build_masked_compound_wedge_extend(
|
||||
dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
|
||||
|
@ -2484,7 +2381,8 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
|
|||
MAX_SB_SIZE,
|
||||
mi->mbmi.interinter_wedge_index,
|
||||
mi->mbmi.interinter_wedge_sign,
|
||||
mi->mbmi.sb_type, h, w);
|
||||
mi->mbmi.sb_type, h, w,
|
||||
xd->cur_buf->bit_depth);
|
||||
else
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
build_masked_compound_wedge(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#define MASK_BITS 6
|
||||
|
||||
void vpx_blend_mask6_c(uint8_t *dst, uint32_t dst_stride,
|
||||
uint8_t *src0, uint32_t src0_stride,
|
||||
uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride,
|
||||
int h, int w, int subh, int subw) {
|
||||
int i, j;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 4);
|
||||
assert(w >= 4);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 = mask[i * mask_stride + j];
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i) * mask_stride + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
|
||||
2);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[i * mask_stride + (2 * j)] +
|
||||
mask[i * mask_stride + (2 * j + 1)], 1);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + j] +
|
||||
mask[(2 * i + 1) * mask_stride + j], 1);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vpx_highbd_blend_mask6_c(uint8_t *dst_8, uint32_t dst_stride,
|
||||
uint8_t *src0_8, uint32_t src0_stride,
|
||||
uint8_t *src1_8, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride,
|
||||
int h, int w, int subh, int subw, int bd) {
|
||||
int i, j;
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
|
||||
uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
|
||||
uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 4);
|
||||
assert(w >= 4);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
assert(bd == 8 || bd == 10 || bd == 12);
|
||||
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 = mask[i * mask_stride + j];
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i) * mask_stride + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
|
||||
2);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[i * mask_stride + (2 * j)] +
|
||||
mask[i * mask_stride + (2 * j + 1)], 1);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i)
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m0 =
|
||||
ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + j] +
|
||||
mask[(2 * i + 1) * mask_stride + j], 1);
|
||||
const int m1 = ((1 << MASK_BITS) - m0);
|
||||
dst[i * dst_stride + j] =
|
||||
ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
|
||||
src1[i * src1_stride + j] * m1, MASK_BITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
@ -65,6 +65,15 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c
|
|||
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h
|
||||
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c
|
||||
|
||||
# inter predictions
|
||||
|
||||
ifeq ($(CONFIG_VP10),yes)
|
||||
ifeq ($(CONFIG_EXT_INTER),yes)
|
||||
DSP_SRCS-yes += blend_mask6.c
|
||||
DSP_SRCS-$(HAVE_SSE4_1) += x86/blend_mask6_sse4.c
|
||||
endif #CONFIG_EXT_INTER
|
||||
endif #CONFIG_VP10
|
||||
|
||||
# interpolation filters
|
||||
DSP_SRCS-yes += vpx_convolve.c
|
||||
DSP_SRCS-yes += vpx_convolve.h
|
||||
|
|
|
@ -32,6 +32,8 @@ extern "C" {
|
|||
|
||||
#define IMPLIES(a, b) (!(a) || (b)) // Logical 'a implies b' (or 'a -> b')
|
||||
|
||||
#define IS_POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0)
|
||||
|
||||
// These can be used to give a hint about branch outcomes.
|
||||
// This can have an effect, even if your target processor has a
|
||||
// good branch predictor, as these hints can affect basic block
|
||||
|
|
|
@ -1358,10 +1358,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
}
|
||||
} # CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
|
||||
#
|
||||
# Masked Variance / Masked Subpixel Variance
|
||||
#
|
||||
if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
|
||||
foreach (@block_sizes) {
|
||||
($w, $h) = @$_;
|
||||
add_proto qw/unsigned int/, "vpx_masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
|
||||
|
@ -1381,6 +1381,14 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
add_proto qw/void vpx_blend_mask6/, "uint8_t *dst, uint32_t dst_stride, uint8_t *src0, uint32_t src0_stride, uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
|
||||
specialize "vpx_blend_mask6", qw/sse4_1/;
|
||||
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vpx_highbd_blend_mask6/, "uint8_t *dst, uint32_t dst_stride, uint8_t *src0, uint32_t src0_stride, uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
|
||||
specialize "vpx_highbd_blend_mask6", qw/sse4_1/;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче