Merge "Add vp9_avg_4x4_neon and the unit test."
This commit is contained in:
Коммит
d9bba21306
|
@ -372,7 +372,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||
::testing::Values(
|
||||
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
|
||||
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
|
||||
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
|
||||
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon),
|
||||
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_neon),
|
||||
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_neon),
|
||||
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_neon)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, IntProRowTest, ::testing::Values(
|
||||
|
|
|
@ -198,7 +198,7 @@ add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
|
|||
specialize qw/vp9_avg_8x8 sse2 neon msa/;
|
||||
|
||||
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
|
||||
specialize qw/vp9_avg_4x4 sse2 msa/;
|
||||
specialize qw/vp9_avg_4x4 sse2 neon msa/;
|
||||
|
||||
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
|
||||
specialize qw/vp9_minmax_8x8 sse2/;
|
||||
|
|
|
@ -24,6 +24,18 @@ static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
|
|||
return vget_lane_u32(c, 0);
|
||||
}
|
||||
|
||||
unsigned int vp9_avg_4x4_neon(const uint8_t *s, int p) {
|
||||
uint16x8_t v_sum;
|
||||
uint32x2_t v_s0 = vdup_n_u32(0);
|
||||
uint32x2_t v_s1 = vdup_n_u32(0);
|
||||
v_s0 = vld1_lane_u32((const uint32_t *)s, v_s0, 0);
|
||||
v_s0 = vld1_lane_u32((const uint32_t *)(s + p), v_s0, 1);
|
||||
v_s1 = vld1_lane_u32((const uint32_t *)(s + 2 * p), v_s1, 0);
|
||||
v_s1 = vld1_lane_u32((const uint32_t *)(s + 3 * p), v_s1, 1);
|
||||
v_sum = vaddl_u8(vreinterpret_u8_u32(v_s0), vreinterpret_u8_u32(v_s1));
|
||||
return (horizontal_add_u16x8(v_sum) + 8) >> 4;
|
||||
}
|
||||
|
||||
unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
|
||||
uint8x8_t v_s0 = vld1_u8(s);
|
||||
const uint8x8_t v_s1 = vld1_u8(s + p);
|
||||
|
|
Загрузка…
Ссылка в новой задаче