Merge "Code clean of highbd_tm_predictor_32x32"
This commit is contained in:
Коммит
b8c2a4eb0c
|
@ -132,7 +132,6 @@ using std::tr1::make_tuple;
|
|||
#if HAVE_SSE2
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#if CONFIG_USE_X86INC
|
||||
#if ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
|
@ -159,34 +158,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
|||
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_predictor_32x32_c, 32, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
|
||||
&vpx_highbd_v_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
|
||||
&vpx_highbd_v_predictor_8x8_c, 8, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
|
||||
#endif // !ARCH_X86_64
|
||||
|
||||
#if ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
|
@ -219,37 +191,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
|||
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_predictor_32x32_c, 32,
|
||||
10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||
&vpx_highbd_tm_predictor_16x16_c, 16,
|
||||
10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_predictor_16x16_c, 16,
|
||||
10),
|
||||
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
|
||||
&vpx_highbd_v_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
|
||||
&vpx_highbd_v_predictor_8x8_c, 8, 10),
|
||||
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 10),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
|
||||
#endif // !ARCH_X86_64
|
||||
|
||||
#if ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
|
@ -282,35 +224,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
|||
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||
&vpx_highbd_dc_predictor_32x32_c, 32,
|
||||
12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||
&vpx_highbd_tm_predictor_16x16_c, 16,
|
||||
12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
|
||||
&vpx_highbd_dc_predictor_16x16_c, 16,
|
||||
12),
|
||||
make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
|
||||
&vpx_highbd_v_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
|
||||
&vpx_highbd_v_predictor_8x8_c, 8, 12),
|
||||
make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 12),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
|
||||
#endif // !ARCH_X86_64
|
||||
|
||||
#endif // CONFIG_USE_X86INC
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_SSE2
|
||||
|
|
|
@ -435,7 +435,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
|
||||
|
||||
add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
|
||||
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
|
||||
|
|
|
@ -385,9 +385,8 @@ cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
|
|||
jnz .loop
|
||||
REP_RET
|
||||
|
||||
%if ARCH_X86_64
|
||||
INIT_XMM sse2
|
||||
cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
|
||||
cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps
|
||||
movd m0, [aboveq-2]
|
||||
mova m1, [aboveq]
|
||||
mova m2, [aboveq+16]
|
||||
|
@ -395,70 +394,60 @@ cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
|
|||
mova m4, [aboveq+48]
|
||||
pshuflw m0, m0, 0x0
|
||||
; Get the values to compute the maximum value at this bit depth
|
||||
mov oned, 1
|
||||
pxor m10, m10
|
||||
pxor m11, m11
|
||||
pinsrw m10, oned, 0
|
||||
pinsrw m11, bpsd, 0
|
||||
pshuflw m10, m10, 0x0
|
||||
DEFINE_ARGS dst, stride, line, left
|
||||
punpcklqdq m10, m10
|
||||
mov lineq, -16
|
||||
mova m5, m10
|
||||
pcmpeqw m5, m5
|
||||
movd m6, bpsd
|
||||
psllw m5, m6
|
||||
pcmpeqw m7, m7
|
||||
pxor m6, m6 ; min possible value
|
||||
pxor m5, m7 ; max possible value
|
||||
punpcklqdq m0, m0
|
||||
psllw m10, m11
|
||||
add leftq, 64
|
||||
psubw m10, m5 ; max possible value
|
||||
pxor m11, m11 ; min possible value
|
||||
DEFINE_ARGS dst, stride, line, left
|
||||
mov lineq, -16
|
||||
psubw m1, m0
|
||||
psubw m2, m0
|
||||
psubw m3, m0
|
||||
psubw m4, m0
|
||||
.loop:
|
||||
movd m5, [leftq+lineq*4]
|
||||
movd m6, [leftq+lineq*4+2]
|
||||
pshuflw m5, m5, 0x0
|
||||
pshuflw m6, m6, 0x0
|
||||
punpcklqdq m5, m5
|
||||
punpcklqdq m6, m6
|
||||
paddw m7, m5, m1
|
||||
paddw m8, m5, m2
|
||||
paddw m9, m5, m3
|
||||
paddw m5, m4
|
||||
;Clamp these values to the bit-depth
|
||||
pminsw m7, m10
|
||||
pminsw m8, m10
|
||||
pminsw m9, m10
|
||||
pminsw m5, m10
|
||||
pmaxsw m7, m11
|
||||
pmaxsw m8, m11
|
||||
pmaxsw m9, m11
|
||||
pmaxsw m5, m11
|
||||
;Store these values
|
||||
mova [dstq ], m7
|
||||
mova [dstq +16], m8
|
||||
mova [dstq +32], m9
|
||||
mova [dstq +48], m5
|
||||
paddw m7, m6, m1
|
||||
paddw m8, m6, m2
|
||||
paddw m9, m6, m3
|
||||
paddw m6, m4
|
||||
;Clamp these values to the bit-depth
|
||||
pminsw m7, m10
|
||||
pminsw m8, m10
|
||||
pminsw m9, m10
|
||||
pminsw m6, m10
|
||||
pmaxsw m7, m11
|
||||
pmaxsw m8, m11
|
||||
pmaxsw m9, m11
|
||||
pmaxsw m6, m11
|
||||
;Store these values
|
||||
mova [dstq+strideq*2 ], m7
|
||||
mova [dstq+strideq*2+16], m8
|
||||
mova [dstq+strideq*2+32], m9
|
||||
mova [dstq+strideq*2+48], m6
|
||||
movd m7, [leftq]
|
||||
pshuflw m7, m7, 0x0
|
||||
punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1
|
||||
paddw m0, m7, m1
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq ], m0
|
||||
paddw m0, m7, m2
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq +16], m0
|
||||
paddw m0, m7, m3
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq +32], m0
|
||||
paddw m0, m7, m4
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq +48], m0
|
||||
movd m7, [leftq+2]
|
||||
pshuflw m7, m7, 0x0
|
||||
punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2
|
||||
paddw m0, m7, m1
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq+strideq*2 ], m0
|
||||
paddw m0, m7, m2
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq+strideq*2+16], m0
|
||||
paddw m0, m7, m3
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq+strideq*2+32], m0
|
||||
paddw m0, m7, m4
|
||||
pminsw m0, m5
|
||||
pmaxsw m0, m6
|
||||
mova [dstq+strideq*2+48], m0
|
||||
lea dstq, [dstq+strideq*4]
|
||||
lea leftq, [leftq+4]
|
||||
inc lineq
|
||||
jnz .loop
|
||||
REP_RET
|
||||
%endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче