Merge changes Iaf8cbe95,I6748183d,I2a49811d
* changes: add vp9_satd_neon fix vp9_satd_sse2 vp9_satd: return an int
This commit is contained in:
Коммит
fd51d90159
|
@ -194,6 +194,48 @@ class IntProColTest
|
|||
int16_t sum_c_;
|
||||
};
|
||||
|
||||
typedef int (*SatdFunc)(const int16_t *coeffs, int length);
|
||||
typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
|
||||
|
||||
class SatdTest
|
||||
: public ::testing::Test,
|
||||
public ::testing::WithParamInterface<SatdTestParam> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
satd_size_ = GET_PARAM(0);
|
||||
satd_func_ = GET_PARAM(1);
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
src_ = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(16, sizeof(*src_) * satd_size_));
|
||||
ASSERT_TRUE(src_ != NULL);
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
vpx_free(src_);
|
||||
}
|
||||
|
||||
void FillConstant(const int16_t val) {
|
||||
for (int i = 0; i < satd_size_; ++i) src_[i] = val;
|
||||
}
|
||||
|
||||
void FillRandom() {
|
||||
for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
|
||||
}
|
||||
|
||||
void Check(const int expected) {
|
||||
int total;
|
||||
ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
|
||||
EXPECT_EQ(expected, total);
|
||||
}
|
||||
|
||||
int satd_size_;
|
||||
|
||||
private:
|
||||
int16_t *src_;
|
||||
SatdFunc satd_func_;
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
uint8_t* AverageTestBase::source_data_ = NULL;
|
||||
|
||||
|
@ -246,6 +288,36 @@ TEST_P(IntProColTest, Random) {
|
|||
RunComparison();
|
||||
}
|
||||
|
||||
|
||||
TEST_P(SatdTest, MinValue) {
|
||||
const int kMin = -32640;
|
||||
const int expected = -kMin * satd_size_;
|
||||
FillConstant(kMin);
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
TEST_P(SatdTest, MaxValue) {
|
||||
const int kMax = 32640;
|
||||
const int expected = kMax * satd_size_;
|
||||
FillConstant(kMax);
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
TEST_P(SatdTest, Random) {
|
||||
int expected;
|
||||
switch (satd_size_) {
|
||||
case 16: expected = 205298; break;
|
||||
case 64: expected = 1113950; break;
|
||||
case 256: expected = 4268415; break;
|
||||
case 1024: expected = 16954082; break;
|
||||
default:
|
||||
FAIL() << "Invalid satd size (" << satd_size_
|
||||
<< ") valid: 16/64/256/1024";
|
||||
}
|
||||
FillRandom();
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
@ -254,6 +326,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
|
||||
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_c),
|
||||
make_tuple(64, &vp9_satd_c),
|
||||
make_tuple(256, &vp9_satd_c),
|
||||
make_tuple(1024, &vp9_satd_c)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, AverageTest,
|
||||
|
@ -276,6 +356,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_sse2),
|
||||
make_tuple(64, &vp9_satd_sse2),
|
||||
make_tuple(256, &vp9_satd_sse2),
|
||||
make_tuple(1024, &vp9_satd_sse2)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
|
@ -297,6 +385,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
|
||||
make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
|
||||
make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_neon),
|
||||
make_tuple(64, &vp9_satd_neon),
|
||||
make_tuple(256, &vp9_satd_neon),
|
||||
make_tuple(1024, &vp9_satd_neon)));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA
|
||||
|
|
|
@ -209,8 +209,8 @@ specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
|
|||
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
|
||||
specialize qw/vp9_hadamard_16x16 sse2/;
|
||||
|
||||
add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
|
||||
specialize qw/vp9_satd sse2/;
|
||||
add_proto qw/int vp9_satd/, "const int16_t *coeff, int length";
|
||||
specialize qw/vp9_satd sse2 neon/;
|
||||
|
||||
add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
|
||||
specialize qw/vp9_int_pro_row sse2 neon/;
|
||||
|
|
|
@ -50,6 +50,33 @@ unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
|
|||
return (horizontal_add_u16x8(v_sum) + 32) >> 6;
|
||||
}
|
||||
|
||||
// coeff: 16 bits, dynamic range [-32640, 32640].
|
||||
// length: value range {16, 64, 256, 1024}.
|
||||
int vp9_satd_neon(const int16_t *coeff, int length) {
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4_t accum = vdupq_n_s32(0);
|
||||
|
||||
do {
|
||||
const int16x8_t src0 = vld1q_s16(coeff);
|
||||
const int16x8_t src8 = vld1q_s16(coeff + 8);
|
||||
accum = vabal_s16(accum, vget_low_s16(src0), zero);
|
||||
accum = vabal_s16(accum, vget_high_s16(src0), zero);
|
||||
accum = vabal_s16(accum, vget_low_s16(src8), zero);
|
||||
accum = vabal_s16(accum, vget_high_s16(src8), zero);
|
||||
length -= 16;
|
||||
coeff += 16;
|
||||
} while (length != 0);
|
||||
|
||||
{
|
||||
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
|
||||
const int64x2_t s0 = vpaddlq_s32(accum); // cascading summation of 'accum'.
|
||||
const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)),
|
||||
vreinterpret_s32_s64(vget_high_s64(s0)));
|
||||
const int satd = vget_lane_s32(s1, 0);
|
||||
return satd;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
|
||||
const int ref_stride, const int height) {
|
||||
int i;
|
||||
|
|
|
@ -117,14 +117,14 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
|
|||
|
||||
// coeff: 16 bits, dynamic range [-32640, 32640].
|
||||
// length: value range {16, 64, 256, 1024}.
|
||||
int16_t vp9_satd_c(const int16_t *coeff, int length) {
|
||||
int vp9_satd_c(const int16_t *coeff, int length) {
|
||||
int i;
|
||||
int satd = 0;
|
||||
for (i = 0; i < length; ++i)
|
||||
satd += abs(coeff[i]);
|
||||
|
||||
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
|
||||
return (int16_t)satd;
|
||||
return satd;
|
||||
}
|
||||
|
||||
// Integer projection onto row vectors.
|
||||
|
|
|
@ -673,7 +673,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
|
|||
if (*eob == 1)
|
||||
*rate += (int)abs(qcoeff[0]);
|
||||
else if (*eob > 1)
|
||||
*rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
|
||||
*rate += vp9_satd((const int16_t *)qcoeff, step << 4);
|
||||
|
||||
*dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
|
||||
}
|
||||
|
|
|
@ -283,31 +283,30 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
|
|||
}
|
||||
}
|
||||
|
||||
int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
|
||||
int vp9_satd_sse2(const int16_t *coeff, int length) {
|
||||
int i;
|
||||
__m128i sum = _mm_load_si128((const __m128i *)coeff);
|
||||
__m128i sign = _mm_srai_epi16(sum, 15);
|
||||
__m128i val = _mm_xor_si128(sum, sign);
|
||||
sum = _mm_sub_epi16(val, sign);
|
||||
coeff += 8;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i accum = zero;
|
||||
|
||||
for (i = 8; i < length; i += 8) {
|
||||
__m128i src_line = _mm_load_si128((const __m128i *)coeff);
|
||||
sign = _mm_srai_epi16(src_line, 15);
|
||||
val = _mm_xor_si128(src_line, sign);
|
||||
val = _mm_sub_epi16(val, sign);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
for (i = 0; i < length; i += 8) {
|
||||
const __m128i src_line = _mm_load_si128((const __m128i *)coeff);
|
||||
const __m128i inv = _mm_sub_epi16(zero, src_line);
|
||||
const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line)
|
||||
const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);
|
||||
const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero);
|
||||
const __m128i sum = _mm_add_epi32(abs_lo, abs_hi);
|
||||
accum = _mm_add_epi32(accum, sum);
|
||||
coeff += 8;
|
||||
}
|
||||
|
||||
val = _mm_srli_si128(sum, 8);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
val = _mm_srli_epi64(sum, 32);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
val = _mm_srli_epi32(sum, 16);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
{ // cascading summation of accum
|
||||
__m128i hi = _mm_srli_si128(accum, 8);
|
||||
accum = _mm_add_epi32(accum, hi);
|
||||
hi = _mm_srli_epi64(accum, 32);
|
||||
accum = _mm_add_epi32(accum, hi);
|
||||
}
|
||||
|
||||
return _mm_extract_epi16(sum, 0);
|
||||
return _mm_cvtsi128_si32(accum);
|
||||
}
|
||||
|
||||
void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
|
|
Загрузка…
Ссылка в новой задаче