Merge "vpx_minmax_8x8_neon and test"

This commit is contained in:
Johann Koenig 2016-04-25 18:58:29 +00:00 коммит произвёл Gerrit Code Review
Родитель bd3c874cac 2f5840de3e
Коммит 520055bd1a
4 изменённых файлов: 191 добавлений и 1 удалений

132
test/minmax_test.cc Normal file
Просмотреть файл

@ -0,0 +1,132 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "test/acm_random.h"
#include "test/register_state_check.h"
namespace {
using ::libvpx_test::ACMRandom;
typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int *min, int *max);
class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
public:
virtual void SetUp() {
mm_func_ = GetParam();
rnd_.Reset(ACMRandom::DeterministicSeed());
}
protected:
MinMaxFunc mm_func_;
ACMRandom rnd_;
};
void reference_minmax(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int *min_ret, int *max_ret) {
int min = 255;
int max = 0;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
if (min > diff) min = diff;
if (max < diff) max = diff;
}
}
*min_ret = min;
*max_ret = max;
}
TEST_P(MinMaxTest, MinValue) {
for (int i = 0; i < 64; i++) {
uint8_t a[64], b[64];
memset(a, 0, sizeof(a));
memset(b, 255, sizeof(b));
b[i] = i; // Set a minimum difference of i.
int min, max;
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(255, max);
EXPECT_EQ(i, min);
}
}
TEST_P(MinMaxTest, MaxValue) {
for (int i = 0; i < 64; i++) {
uint8_t a[64], b[64];
memset(a, 0, sizeof(a));
memset(b, 0, sizeof(b));
b[i] = i; // Set a maximum difference of i.
int min, max;
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(i, max);
EXPECT_EQ(0, min);
}
}
TEST_P(MinMaxTest, CompareReference) {
uint8_t a[64], b[64];
for (int j = 0; j < 64; j++) {
a[j] = rnd_.Rand8();
b[j] = rnd_.Rand8();
}
int min_ref, max_ref, min, max;
reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(max_ref, max);
EXPECT_EQ(min_ref, min);
}
TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
uint8_t a[8 * 64], b[8 * 64];
for (int i = 0; i < 8 * 64; i++) {
a[i] = rnd_.Rand8();
b[i] = rnd_.Rand8();
}
for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
int min_ref, max_ref, min, max;
reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
<< " and b_stride = " << b_stride;;
EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
<< " and b_stride = " << b_stride;;
}
}
}
INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_sse2));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_neon));
#endif
} // namespace

Просмотреть файл

@ -144,6 +144,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc

Просмотреть файл

@ -197,3 +197,60 @@ int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
return s - ((t * t) >> shift_factor);
}
}
void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int *min, int *max) {
// Load and concatenate.
const uint8x16_t a01 = vcombine_u8(vld1_u8(a),
vld1_u8(a + a_stride));
const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride),
vld1_u8(a + 3 * a_stride));
const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride),
vld1_u8(a + 5 * a_stride));
const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride),
vld1_u8(a + 7 * a_stride));
const uint8x16_t b01 = vcombine_u8(vld1_u8(b),
vld1_u8(b + b_stride));
const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride),
vld1_u8(b + 3 * b_stride));
const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride),
vld1_u8(b + 5 * b_stride));
const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride),
vld1_u8(b + 7 * b_stride));
// Absolute difference.
const uint8x16_t ab01_diff = vabdq_u8(a01, b01);
const uint8x16_t ab23_diff = vabdq_u8(a23, b23);
const uint8x16_t ab45_diff = vabdq_u8(a45, b45);
const uint8x16_t ab67_diff = vabdq_u8(a67, b67);
// Max values between the Q vectors.
const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff);
const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff);
const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff);
const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff);
const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
// Split to D and start doing pairwise.
uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max));
uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min));
// Enough runs of vpmax/min propogate the max/min values to every position.
ab_max = vpmax_u8(ab_max, ab_max);
ab_min = vpmin_u8(ab_min, ab_min);
ab_max = vpmax_u8(ab_max, ab_max);
ab_min = vpmin_u8(ab_min, ab_min);
ab_max = vpmax_u8(ab_max, ab_max);
ab_min = vpmin_u8(ab_min, ab_min);
*min = *max = 0; // Clear high bits
// Store directly to avoid costly neon->gpr transfer.
vst1_lane_u8((uint8_t *)max, ab_max, 0);
vst1_lane_u8((uint8_t *)min, ab_min, 0);
}

Просмотреть файл

@ -1014,7 +1014,7 @@ if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCO
specialize qw/vpx_avg_4x4 sse2 neon msa/;
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vpx_minmax_8x8 sse2/;
specialize qw/vpx_minmax_8x8 sse2 neon/;
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";