diff --git a/xpcom/reflect/xptcall/md/unix/moz.build b/xpcom/reflect/xptcall/md/unix/moz.build index c8576dfa93d3..019487bd69a2 100644 --- a/xpcom/reflect/xptcall/md/unix/moz.build +++ b/xpcom/reflect/xptcall/md/unix/moz.build @@ -14,6 +14,10 @@ if CONFIG['OS_ARCH'] == 'Darwin': '!xptcstubs_asm_ppc_darwin.s', 'xptcinvoke_asm_ppc_rhapsody.s', ] + if CONFIG['OS_TEST'] == 'x86_64': + SOURCES += [ + 'xptcinvoke_asm_x86_64_unix.S', + ] if '86' in CONFIG['OS_TEST'] and CONFIG['OS_TEST'] != 'x86_64': DEFINES['MOZ_NEED_LEADING_UNDERSCORE'] = True @@ -28,6 +32,7 @@ if CONFIG['OS_ARCH'] in ('Linux', 'Bitrig', 'DragonFly', 'FreeBSD', 'NetBSD', 'O CONFIG['OS_ARCH'].startswith('GNU_'): if CONFIG['OS_TEST'] == 'x86_64': SOURCES += [ + 'xptcinvoke_asm_x86_64_unix.S', 'xptcinvoke_x86_64_unix.cpp', 'xptcstubs_x86_64_linux.cpp', ] @@ -53,6 +58,7 @@ if CONFIG['OS_ARCH'] == 'SunOS' and '86' in CONFIG['OS_TEST']: if CONFIG['OS_TEST'] == 'x86_64': if CONFIG['GNU_CC']: SOURCES += [ + 'xptcinvoke_asm_x86_64_unix.S', 'xptcinvoke_x86_64_unix.cpp', 'xptcstubs_x86_64_linux.cpp' ] diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_x86_64_unix.S b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_x86_64_unix.S new file mode 100644 index 000000000000..83a24a305e5e --- /dev/null +++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_x86_64_unix.S @@ -0,0 +1,102 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Darwin gives a leading '_' to symbols defined in C code. +#ifdef XP_DARWIN +#define SYM(x) _ ## x +#else +#define SYM(x) x +#endif + +.intel_syntax noprefix + +# nsresult NS_InvokeByIndex(nsISupports* this, uint32_t aVtableIndex, +# uint32_t argc, nsXPTCVariant* argv); +.text +.global SYM(NS_InvokeByIndex) +#ifndef XP_DARWIN +.type NS_InvokeByIndex, @function +#endif +.align 4 +SYM(NS_InvokeByIndex): + push rbp + mov rbp, rsp + +# save r12 and r13 because we use them and they are callee saved. + push r12 + push r13 + +# save this and the vtable index because we need them after setting up the +# stack. + mov r12, rdi + mov r13, rsi + +# allocate space for stack arguments, in theory we only need 8 * (argc - 5) +# bytes because at least 5 arguments will go in registers, but for now it is +# just simpler to allocate 8 * argc bytes. Note that we treat the this +# pointer specially. + lea eax, [edx * 8] + sub rsp, rax + +# If there is an odd number of args the stack can be misaligned so realign it. + and rsp, 0xfffffffffffffff0 + +# pass the stack slot area to InvokeCopyToStack. + mov r8, rsp + +# setup space for the register slots: there are 5 integer ones and 8 floating +# point ones. So we need 104 bytes of space, but we allocate 112 to keep rsp +# aligned to 16 bytes. + sub rsp, 112 + +# the first argument to InvokeCopyToStack is the integer register area, and the +# second is the floating point area. + mov rdi, rsp + lea rsi, [rsp + 40] + +# The 3rd and 4th arguments to InvokeCopyToStack are already in the right +# registers. So now we can just call InvokeCopyToStack. + call SYM(InvokeCopyToStack) + +# setup this + mov rdi, r12 + +# copy the integer arguments into place. + mov rsi, [rsp] + mov rdx, [rsp + 8] + mov rcx, [rsp + 16] + mov r8, [rsp + 24] + mov r9, [rsp + 32] + +# copy the float arguments into place + movsd xmm0, [rsp + 40] + movsd xmm1, [rsp + 48] + movsd xmm2, [rsp + 56] + movsd xmm3, [rsp + 64] + movsd xmm4, [rsp + 72] + movsd xmm5, [rsp + 80] + movsd xmm6, [rsp + 88] + movsd xmm7, [rsp + 96] + +# get rid of the scratch space for registers + add rsp, 112 + +# load the function pointer and call + lea eax, [r13d * 8] + add rax, [rdi] + call [rax] + +# r12 and r13 were pushed relative to the old stack pointer which is now the +# frame pointer. + mov r12, [rbp - 0x8] + mov r13, [rbp - 0x10] + + mov rsp, rbp + pop rbp + ret + +#ifndef XP_DARWIN +// Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits ; .previous +#endif diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp index 08e51988971c..a9db2a693a58 100644 --- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp +++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_x86_64_unix.cpp @@ -8,46 +8,19 @@ #include "xptcprivate.h" -// 6 integral parameters are passed in registers -const uint32_t GPR_COUNT = 6; +// 6 integral parameters are passed in registers, but 1 is |this| which isn't +// considered here. +const uint32_t GPR_COUNT = 5; // 8 floating point parameters are passed in SSE registers const uint32_t FPR_COUNT = 8; -// Remember that these 'words' are 64-bit long -static inline void -invoke_count_words(uint32_t paramCount, nsXPTCVariant * s, - uint32_t & nr_stack) +extern "C" void +InvokeCopyToStack(uint64_t * gpregs, double * fpregs, + uint32_t paramCount, nsXPTCVariant * s, + uint64_t* d) { - uint32_t nr_gpr; - uint32_t nr_fpr; - nr_gpr = 1; // skip one GP register for 'that' - nr_fpr = 0; - nr_stack = 0; - - /* Compute number of eightbytes of class MEMORY. */ - for (uint32_t i = 0; i < paramCount; i++, s++) { - if (!s->IsPtrData() - && (s->type == nsXPTType::T_FLOAT || s->type == nsXPTType::T_DOUBLE)) { - if (nr_fpr < FPR_COUNT) - nr_fpr++; - else - nr_stack++; - } - else { - if (nr_gpr < GPR_COUNT) - nr_gpr++; - else - nr_stack++; - } - } -} - -static void -invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s, - uint64_t * gpregs, double * fpregs) -{ - uint32_t nr_gpr = 1u; // skip one GP register for 'that' + uint32_t nr_gpr = 0u; // skip one GP register for 'that' uint32_t nr_fpr = 0u; uint64_t value = 0u; @@ -100,89 +73,3 @@ invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s, } } } - -// Disable avx for the next function to allow compilation with -// -march=native on new machines, or similar hardcoded -march options. -// Having avx enabled appears to change the alignment behavior of alloca -// (apparently adding an extra 16 bytes) of padding/alignment (and using -// 32-byte alignment instead of 16-byte). This seems to be the best -// available workaround, given that this code, which should perhaps -// better be written in assembly, is written in C++. -#ifndef __clang__ -#pragma GCC push_options -#pragma GCC target ("no-avx") -#endif - -// Avoid AddressSanitizer instrumentation for the next function because it -// depends on __builtin_alloca behavior and alignment that cannot be relied on -// once the function is compiled with a version of ASan that has dynamic-alloca -// instrumentation enabled. - -MOZ_ASAN_BLACKLIST -EXPORT_XPCOM_API(nsresult) -NS_InvokeByIndex(nsISupports * that, uint32_t methodIndex, - uint32_t paramCount, nsXPTCVariant * params) -{ - uint32_t nr_stack; - invoke_count_words(paramCount, params, nr_stack); - - // Stack, if used, must be 16-bytes aligned - if (nr_stack) - nr_stack = (nr_stack + 1) & ~1; - - // Load parameters to stack, if necessary - uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8); - uint64_t gpregs[GPR_COUNT]; - double fpregs[FPR_COUNT]; - invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs); - - // We used to have switches to make sure we would only load the registers - // that are needed for this call. That produced larger code that was - // not faster in practice. It also caused compiler warnings about the - // variables being used uninitialized. - // We now just load every every register. There could still be a warning - // from a memory analysis tools that we are loading uninitialized stack - // positions. - - // FIXME: this function depends on the above __builtin_alloca placing - // the array in the correct spot for the ABI. - - // Load FPR registers from fpregs[] - double d0, d1, d2, d3, d4, d5, d6, d7; - - d7 = fpregs[7]; - d6 = fpregs[6]; - d5 = fpregs[5]; - d4 = fpregs[4]; - d3 = fpregs[3]; - d2 = fpregs[2]; - d1 = fpregs[1]; - d0 = fpregs[0]; - - // Load GPR registers from gpregs[] - uint64_t a0, a1, a2, a3, a4, a5; - - a5 = gpregs[5]; - a4 = gpregs[4]; - a3 = gpregs[3]; - a2 = gpregs[2]; - a1 = gpregs[1]; - a0 = (uint64_t) that; - - // Get pointer to method - uint64_t methodAddress = *((uint64_t *)that); - methodAddress += 8 * methodIndex; - methodAddress = *((uint64_t *)methodAddress); - - typedef nsresult (*Method)(uint64_t, uint64_t, uint64_t, uint64_t, - uint64_t, uint64_t, double, double, double, - double, double, double, double, double); - nsresult result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5, - d0, d1, d2, d3, d4, d5, - d6, d7); - return result; -} - -#ifndef __clang__ -#pragma GCC pop_options -#endif