some XMM registers are non-volatile on windows x64 ABI

XMM6 to XMM15 are non-volatile on Windows x64 ABI.  We have to save
these registers.

Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076
This commit is contained in:
Makoto Kato 2010-06-11 18:32:28 +09:00 коммит произвёл John Koleszar
Родитель 8389f1967c
Коммит 63ea8705eb
6 изменённых файлов: 53 добавлений и 0 удалений

Просмотреть файл

@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
SAVE_XMM
push rsi
push rdi
; end prolog
@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -26,6 +26,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -212,6 +213,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -231,6 +233,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -652,6 +655,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -671,6 +675,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -1002,6 +1007,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -1021,6 +1027,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -1564,6 +1571,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -1583,6 +1591,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -1679,6 +1688,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -1698,6 +1708,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
@ -1942,6 +1953,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -240,6 +241,7 @@ acrossnextcol:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -439,6 +442,7 @@ loop_row:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -612,6 +617,7 @@ nextcol4:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM
push rsi
push rdi
; end prolog
@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -397,6 +402,7 @@ vp8_filter_block1d8_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -510,6 +516,7 @@ vp8_filter_block1d16_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -641,6 +648,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -876,6 +884,7 @@ vp8_filter_block1d8_v6_only_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -894,6 +903,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -933,6 +943,7 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -953,6 +964,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi

Просмотреть файл

@ -215,6 +215,25 @@
%define UNSHADOW_ARGS mov rsp, rbp
%endif
; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
; rsp register has to be aligned
%ifidn __OUTPUT_FORMAT__,x64
%macro SAVE_XMM 0
sub rsp, 32
movdqa XMMWORD PTR [rsp], xmm6
movdqa XMMWORD PTR [rsp+16], xmm7
%endmacro
%macro RESTORE_XMM 0
movdqa xmm6, XMMWORD PTR [rsp]
movdqa xmm7, XMMWORD PTR [rsp+16]
add rsp, 32
%endmacro
%else
%macro SAVE_XMM 0
%endmacro
%macro RESTORE_XMM 0
%endmacro
%endif
; Name of the rodata section
;