vp9_reconintra_neon: add d45 16x16
~90% faster over 20M pixels Change-Id: I92d80f66e91e0a870a672cfb5dd29bf1a17cb11a
This commit is contained in:
Родитель
c8b9658ecc
Коммит
9db1f24c47
|
@ -316,8 +316,8 @@ INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
|
|||
vp9_dc_left_predictor_16x16_neon,
|
||||
vp9_dc_top_predictor_16x16_neon,
|
||||
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
|
||||
vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_tm_predictor_16x16_neon)
|
||||
vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
|
||||
NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA
|
||||
|
|
|
@ -358,6 +358,23 @@ void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
|
|||
vst1_u8(dst + i * stride, row);
|
||||
}
|
||||
|
||||
void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
const uint8x16_t A0 = vld1q_u8(above); // top row
|
||||
const uint8x16_t above_right = vld1q_dup_u8(above + 15);
|
||||
const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
|
||||
const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
|
||||
const uint8x16_t avg1 = vhaddq_u8(A0, A2);
|
||||
uint8x16_t row = vrhaddq_u8(avg1, A1);
|
||||
int i;
|
||||
(void)left;
|
||||
for (i = 0; i < 15; ++i) {
|
||||
vst1q_u8(dst + i * stride, row);
|
||||
row = vextq_u8(row, above_right, 1);
|
||||
}
|
||||
vst1q_u8(dst + i * stride, row);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
|
||||
|
|
|
@ -138,7 +138,7 @@ add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
|
|||
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
|
||||
specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
|
||||
|
||||
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
|
||||
|
|
Загрузка…
Ссылка в новой задаче