From 63ea8705eb0b4609b1c87968817d18421f051641 Mon Sep 17 00:00:00 2001 From: Makoto Kato Date: Fri, 11 Jun 2010 18:32:28 +0900 Subject: [PATCH] some XMM registers are non-volatile on windows x64 ABI XMM6 to XMM15 are non-volatile on Windows x64 ABI. We have to save these registers. Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076 --- vp8/common/x86/iwalsh_sse2.asm | 2 ++ vp8/common/x86/loopfilter_sse2.asm | 12 ++++++++++++ vp8/common/x86/postproc_sse2.asm | 6 ++++++ vp8/common/x86/recon_sse2.asm | 2 ++ vp8/common/x86/subpixel_sse2.asm | 12 ++++++++++++ vpx_ports/x86_abi_support.asm | 19 +++++++++++++++++++ 6 files changed, 53 insertions(+) diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index cb61691fd..bb0d1d7ae 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 + SAVE_XMM push rsi push rdi ; end prolog @@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2): ; begin epilog pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 1c0a3881c..d160dd65a 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -26,6 +26,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -212,6 +213,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -231,6 +233,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -652,6 +655,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -671,6 +675,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -1002,6 +1007,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1021,6 +1027,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -1564,6 +1571,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1583,6 +1591,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -1679,6 +1688,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1698,6 +1708,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx ; save callee-saved reg push rsi push rdi @@ -1942,6 +1953,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 5097b2a30..9e56429e3 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -240,6 +241,7 @@ acrossnextcol: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -439,6 +442,7 @@ loop_row: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -612,6 +617,7 @@ nextcol4: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index 2ce028cdb..cfdbfada9 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 + SAVE_XMM push rsi push rdi ; end prolog @@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2): ; begin epilog pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index cc2837b8d..b71a2f9d1 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -397,6 +402,7 @@ vp8_filter_block1d8_v6_sse2_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -510,6 +516,7 @@ vp8_filter_block1d16_v6_sse2_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -641,6 +648,7 @@ sym(vp8_filter_block1d16_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -876,6 +884,7 @@ vp8_filter_block1d8_v6_only_sse2_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -894,6 +903,7 @@ sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -933,6 +943,7 @@ unpack_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -953,6 +964,7 @@ sym(vp8_bilinear_predict16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 6fdbf8add..7840e3594 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -215,6 +215,25 @@ %define UNSHADOW_ARGS mov rsp, rbp %endif +; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI +; rsp register has to be aligned +%ifidn __OUTPUT_FORMAT__,x64 +%macro SAVE_XMM 0 + sub rsp, 32 + movdqa XMMWORD PTR [rsp], xmm6 + movdqa XMMWORD PTR [rsp+16], xmm7 +%endmacro +%macro RESTORE_XMM 0 + movdqa xmm6, XMMWORD PTR [rsp] + movdqa xmm7, XMMWORD PTR [rsp+16] + add rsp, 32 +%endmacro +%else +%macro SAVE_XMM 0 +%endmacro +%macro RESTORE_XMM 0 +%endmacro +%endif ; Name of the rodata section ;