Merge "Fix x86inc.asm to build PIC code correctly"

This commit is contained in:
Yunqing Wang 2013-09-18 14:51:31 -07:00 коммит произвёл Gerrit Code Review
Родитель bb30fff978 9d901217c6
Коммит a7b7f94ae8
3 изменённых файлов: 140 добавлений и 38 удалений

92
third_party/x86inc/x86inc.asm поставляемый
Просмотреть файл

@ -97,21 +97,91 @@
%endif
%endmacro
%if WIN64
%define PIC
%elifidn __OUTPUT_FORMAT__,macho64
%define PIC
%elif ARCH_X86_64 == 0
; x86_32 doesn't require PIC.
; Some distros prefer shared objects to be PIC, but nothing breaks if
; the code contains a few textrels, so we'll skip that complexity.
%undef PIC
%elif CONFIG_PIC
%define PIC
; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
; from original code is added in for 64bit.
%ifidn __OUTPUT_FORMAT__,elf32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,macho32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,win32
%define ABI_IS_32BIT 1
%elifidn __OUTPUT_FORMAT__,aout
%define ABI_IS_32BIT 1
%else
%define ABI_IS_32BIT 0
%endif
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
%define GET_GOT_SAVE_ARG 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
push %1
call %%get_got
%%sub_offset:
jmp %%exitGG
%%get_got:
mov %1, [esp]
add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
ret
%%exitGG:
%undef GLOBAL
%define GLOBAL(x) x + %1 wrt ..gotoff
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
%define GET_GOT_SAVE_ARG 1
%macro GET_GOT 1
push %1
call %%get_got
%%get_got:
pop %1
%undef GLOBAL
%define GLOBAL(x) x + %1 - %%get_got
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
%endif
%endif
%if ARCH_X86_64 == 0
%undef PIC
%endif
%else
%macro GET_GOT 1
%endmacro
%define GLOBAL(x) rel x
%define WRT_PLT wrt ..plt
%if WIN64
%define PIC
%elifidn __OUTPUT_FORMAT__,macho64
%define PIC
%elif CONFIG_PIC
%define PIC
%endif
%endif
%ifnmacro GET_GOT
%macro GET_GOT 1
%endmacro
%define GLOBAL(x) x
%endif
%ifndef RESTORE_GOT
%define RESTORE_GOT
%endif
%ifndef WRT_PLT
%define WRT_PLT
%endif
%ifdef PIC
default rel
%endif
; Done with PIC macros
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
%ifndef __NASM_VER__

Просмотреть файл

@ -19,12 +19,14 @@ pw_32: times 8 dw 32
SECTION .text
INIT_MMX sse
cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
movd m0, [aboveq]
punpckldq m0, [leftq]
psadbw m0, m1
paddw m0, [pw_4]
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
pshufw m0, m0, 0x0
packuswb m0, m0
@ -33,10 +35,14 @@ cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
lea dstq, [dstq+strideq*2]
movd [dstq ], m0
movd [dstq+strideq], m0
RESTORE_GOT
RET
INIT_MMX sse
cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
movq m0, [aboveq]
movq m2, [leftq]
@ -45,7 +51,7 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
psadbw m0, m1
psadbw m2, m1
paddw m0, m2
paddw m0, [pw_8]
paddw m0, [GLOBAL(pw_8)]
psraw m0, 4
pshufw m0, m0, 0x0
packuswb m0, m0
@ -58,10 +64,14 @@ cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
movq [dstq+stride3q ], m0
RESTORE_GOT
RET
INIT_XMM sse2
cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
mova m0, [aboveq]
mova m2, [leftq]
@ -73,7 +83,7 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [pw_16]
paddw m0, [GLOBAL(pw_16)]
psraw m0, 5
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
@ -86,10 +96,14 @@ cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
RESTORE_GOT
REP_RET
INIT_XMM sse2
cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
mova m0, [aboveq]
mova m2, [aboveq+16]
@ -107,7 +121,7 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
paddw m0, m4
movhlps m2, m0
paddw m0, m2
paddw m0, [pw_32]
paddw m0, [GLOBAL(pw_32)]
psraw m0, 6
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
@ -124,6 +138,8 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
RESTORE_GOT
REP_RET
INIT_MMX sse

Просмотреть файл

@ -112,14 +112,16 @@ cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
REP_RET
INIT_MMX ssse3
cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
GET_GOT goffsetq
movq m0, [aboveq]
pshufb m2, m0, [sh_b23456777]
pshufb m1, m0, [sh_b01234577]
pshufb m0, [sh_b12345677]
pshufb m2, m0, [GLOBAL(sh_b23456777)]
pshufb m1, m0, [GLOBAL(sh_b01234577)]
pshufb m0, [GLOBAL(sh_b12345677)]
pavgb m3, m2, m1
pxor m2, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@ -132,19 +134,23 @@ cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
movd [dstq ], m0
psrlq m0, 8
movd [dstq+strideq], m0
RESTORE_GOT
RET
INIT_MMX ssse3
cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
GET_GOT goffsetq
movq m0, [aboveq]
mova m1, [sh_b12345677]
DEFINE_ARGS dst, stride, stride3, line
mova m1, [GLOBAL(sh_b12345677)]
DEFINE_ARGS dst, stride, stride3
lea stride3q, [strideq*3]
pshufb m2, m0, [sh_b23456777]
pshufb m2, m0, [GLOBAL(sh_b23456777)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@ -167,20 +173,24 @@ cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
movq [dstq+strideq*2], m0
pshufb m0, m1
movq [dstq+stride3q ], m0
RESTORE_GOT
RET
INIT_XMM ssse3
cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
GET_GOT goffsetq
mova m0, [aboveq]
DEFINE_ARGS dst, stride, stride3, dst8, line
lea stride3q, [strideq*3]
lea dst8q, [dstq+strideq*8]
mova m1, [sh_b123456789abcdeff]
pshufb m2, m0, [sh_b23456789abcdefff]
mova m1, [GLOBAL(sh_b123456789abcdeff)]
pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@ -214,29 +224,33 @@ cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
movhps [dstq+strideq +8], m0
movhps [dstq+strideq*2+8], m0
movhps [dstq+stride3q +8], m0
RESTORE_GOT
RET
INIT_XMM ssse3
cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
GET_GOT goffsetq
mova m0, [aboveq]
mova m4, [aboveq+16]
DEFINE_ARGS dst, stride, stride3, dst16, line
lea stride3q, [strideq*3]
lea dst16q, [dstq +strideq*8]
lea dst16q, [dst16q+strideq*8]
mova m1, [sh_b123456789abcdeff]
pshufb m2, m4, [sh_b23456789abcdefff]
mova m1, [GLOBAL(sh_b123456789abcdeff)]
pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m4
pxor m2, m4
palignr m5, m4, m0, 1
palignr m6, m4, m0, 2
pshufb m4, m1
pand m2, [pb_1]
pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m4, m3
pavgb m3, m0, m6
pxor m0, m6
pand m0, [pb_1]
pand m0, [GLOBAL(pb_1)]
psubb m3, m0
pavgb m5, m3
@ -288,4 +302,6 @@ cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
mova [dstq +strideq +16], m4
mova [dstq +strideq*2+16], m4
mova [dstq +stride3q +16], m4
RESTORE_GOT
RET