Merge "mips msa vp9 idct 32x32 optimization"
This commit is contained in:
Коммит
a8a9c2bb45
|
@ -380,4 +380,12 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(&vp9_fdct32x32_rd_avx2,
|
||||
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct32x32_c,
|
||||
&vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
|
|
@ -309,14 +309,18 @@ INSTANTIATE_TEST_CASE_P(
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, PartialIDctTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct32x32_c,
|
||||
&vp9_idct32x32_1024_add_c,
|
||||
&vp9_idct32x32_34_add_msa,
|
||||
TX_32X32, 34),
|
||||
make_tuple(&vp9_fdct32x32_c,
|
||||
&vp9_idct32x32_1024_add_c,
|
||||
&vp9_idct32x32_1_add_msa,
|
||||
TX_32X32, 1),
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_c,
|
||||
&vp9_idct16x16_10_add_msa,
|
||||
TX_16X16, 10),
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_msa,
|
||||
&vp9_idct16x16_10_add_c,
|
||||
TX_16X16, 10),
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_c,
|
||||
&vp9_idct16x16_1_add_msa,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -358,6 +358,14 @@
|
|||
src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
|
||||
}
|
||||
|
||||
#define STORE_4VECS_SH(ptr, stride, \
|
||||
in0, in1, in2, in3) { \
|
||||
STORE_SH(in0, ((ptr) + 0 * stride)); \
|
||||
STORE_SH(in1, ((ptr) + 1 * stride)); \
|
||||
STORE_SH(in2, ((ptr) + 2 * stride)); \
|
||||
STORE_SH(in3, ((ptr) + 3 * stride)); \
|
||||
}
|
||||
|
||||
#define STORE_8VECS_SH(ptr, stride, \
|
||||
in0, in1, in2, in3, \
|
||||
in4, in5, in6, in7) { \
|
||||
|
|
|
@ -443,15 +443,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
|
||||
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
|
||||
#is this a typo?
|
||||
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
|
||||
|
|
|
@ -138,6 +138,7 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
|
|||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
|
||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
|
||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
|
||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
|
||||
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
|
||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
|
||||
|
|
Загрузка…
Ссылка в новой задаче