diff --git a/vp9/common/x86/vp9_iwalsh_mmx.asm b/vp9/common/x86/vp9_iwalsh_mmx.asm deleted file mode 100644 index 1af252168..000000000 --- a/vp9/common/x86/vp9_iwalsh_mmx.asm +++ /dev/null @@ -1,173 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_short_inv_walsh4x4_1_mmx(short *input, short *output) -global sym(vp9_short_inv_walsh4x4_1_mmx) PRIVATE -sym(vp9_short_inv_walsh4x4_1_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rax, 3 - - mov rdi, arg(1) - add rax, [rsi] ;input[0] + 3 - - movd mm0, eax - - punpcklwd mm0, mm0 ;x x val val - - punpckldq mm0, mm0 ;val val val val - - psraw mm0, 3 ;(input[0] + 3) >> 3 - - movq [rdi + 0], mm0 - movq [rdi + 8], mm0 - movq [rdi + 16], mm0 - movq [rdi + 24], mm0 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp9_short_inv_walsh4x4_mmx(short *input, short *output) -global sym(vp9_short_inv_walsh4x4_mmx) PRIVATE -sym(vp9_short_inv_walsh4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - mov rax, 3 - mov rsi, arg(0) - mov rdi, arg(1) - shl rax, 16 - - movq mm0, [rsi + 0] ;ip[0] - movq mm1, [rsi + 8] ;ip[4] - or rax, 3 ;00030003h - - movq mm2, [rsi + 16] ;ip[8] - movq mm3, [rsi + 24] ;ip[12] - - movq mm7, rax - movq mm4, mm0 - - punpcklwd mm7, mm7 ;0003000300030003h - movq mm5, mm1 - - paddw mm4, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm4 ;temp al - - paddw mm4, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm1, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - - paddw mm0, mm1 ;dl + cl - psubw mm5, mm1 ;dl - cl - - ; 03 02 01 00 - ; 13 12 11 10 - ; 23 22 21 20 - ; 33 32 31 30 - - movq mm3, mm4 ; 03 02 01 00 - punpcklwd mm4, mm0 ; 11 01 10 00 - punpckhwd mm3, mm0 ; 13 03 12 02 - - movq mm1, mm6 ; 23 22 21 20 - punpcklwd mm6, mm5 ; 31 21 30 20 - punpckhwd mm1, mm5 ; 33 23 32 22 - - movq mm0, mm4 ; 11 01 10 00 - movq mm2, mm3 ; 13 03 12 02 - - punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] - punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] - - punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] - punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] -;~~~~~~~~~~~~~~~~~~~~~ - movq mm1, mm0 - movq mm5, mm4 - - paddw mm1, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm1 ;temp al - - paddw mm1, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm4, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - - paddw mm0, mm4 ;dl + cl - psubw mm5, mm4 ;dl - cl -;~~~~~~~~~~~~~~~~~~~~~ - movq mm3, mm1 ; 03 02 01 00 - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm3, mm0 ; 13 03 12 02 - - movq mm4, mm6 ; 23 22 21 20 - punpcklwd mm6, mm5 ; 31 21 30 20 - punpckhwd mm4, mm5 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm2, mm3 ; 13 03 12 02 - - punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] - punpckhdq mm1, mm6 ; 31 21 11 01 aka ip[4] - - punpckldq mm2, mm4 ; 32 22 12 02 aka ip[8] - punpckhdq mm3, mm4 ; 33 23 13 03 aka ip[12] - - paddw mm0, mm7 - paddw mm1, mm7 - paddw mm2, mm7 - paddw mm3, mm7 - - psraw mm0, 3 - psraw mm1, 3 - psraw mm2, 3 - psraw mm3, 3 - - movq [rdi + 0], mm0 - movq [rdi + 8], mm1 - movq [rdi + 16], mm2 - movq [rdi + 24], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - diff --git a/vp9/common/x86/vp9_iwalsh_sse2.asm b/vp9/common/x86/vp9_iwalsh_sse2.asm deleted file mode 100644 index 84fa2fe2a..000000000 --- a/vp9/common/x86/vp9_iwalsh_sse2.asm +++ /dev/null @@ -1,119 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp9_short_inv_walsh4x4_sse2(short *input, short *output) -global sym(vp9_short_inv_walsh4x4_sse2) PRIVATE -sym(vp9_short_inv_walsh4x4_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - SAVE_XMM 6 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) - mov rdi, arg(1) - mov rax, 3 - - movdqa xmm0, [rsi + 0] ;ip[4] ip[0] - movdqa xmm1, [rsi + 16] ;ip[12] ip[8] - - shl rax, 16 - or rax, 3 ;00030003h - - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm0 ;ip[4] ip[0] - - paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm4, xmm0 - punpcklqdq xmm0, xmm3 ;d1 a1 - punpckhqdq xmm4, xmm3 ;c1 b1 - movd xmm6, eax - - movdqa xmm1, xmm4 ;c1 b1 - paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - -;;;temp output -;; movdqu [rdi + 0], xmm4 -;; movdqu [rdi + 16], xmm3 - -;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm4 ;ip[4] ip[0] - - pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03 - - paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm5, xmm4 - punpcklqdq xmm4, xmm3 ;d1 a1 - punpckhqdq xmm5, xmm3 ;c1 b1 - - movdqa xmm1, xmm5 ;c1 b1 - paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] -;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 -;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - paddw xmm5, xmm6 - paddw xmm1, xmm6 - - psraw xmm5, 3 - psraw xmm1, 3 - - movdqa [rdi + 0], xmm5 - movdqa [rdi + 16], xmm1 - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index ea4107730..1079e2048 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -75,10 +75,8 @@ VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c -VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm