vp9_reconintra_neon: add d45 16x16

~90% faster over 20M pixels

Change-Id: I92d80f66e91e0a870a672cfb5dd29bf1a17cb11a
This commit is contained in:
James Zern 2015-06-22 20:57:14 -07:00
Родитель c8b9658ecc
Коммит 9db1f24c47
3 изменённых файлов: 20 добавлений и 3 удалений

Просмотреть файл

@ -316,8 +316,8 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
vp9_dc_left_predictor_16x16_neon,
vp9_dc_top_predictor_16x16_neon,
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
vp9_tm_predictor_16x16_neon)
vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
#if HAVE_MSA

Просмотреть файл

@ -358,6 +358,23 @@ void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
vst1_u8(dst + i * stride, row);
}
void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const uint8x16_t A0 = vld1q_u8(above); // top row
const uint8x16_t above_right = vld1q_dup_u8(above + 15);
const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
const uint8x16_t avg1 = vhaddq_u8(A0, A2);
uint8x16_t row = vrhaddq_u8(avg1, A1);
int i;
(void)left;
for (i = 0; i < 15; ++i) {
vst1q_u8(dst + i * stride, row);
row = vextq_u8(row, above_right, 1);
}
vst1q_u8(dst + i * stride, row);
}
// -----------------------------------------------------------------------------
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,

Просмотреть файл

@ -138,7 +138,7 @@ add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";