зеркало из https://github.com/mozilla/gecko-dev.git
bug 1272498 - rewrite NS_InvokeByIndex in assembly r=froydnj
This commit is contained in:
Родитель
f3849011fc
Коммит
17486e3454
|
@ -14,6 +14,10 @@ if CONFIG['OS_ARCH'] == 'Darwin':
|
|||
'!xptcstubs_asm_ppc_darwin.s',
|
||||
'xptcinvoke_asm_ppc_rhapsody.s',
|
||||
]
|
||||
if CONFIG['OS_TEST'] == 'x86_64':
|
||||
SOURCES += [
|
||||
'xptcinvoke_asm_x86_64_unix.S',
|
||||
]
|
||||
if '86' in CONFIG['OS_TEST'] and CONFIG['OS_TEST'] != 'x86_64':
|
||||
DEFINES['MOZ_NEED_LEADING_UNDERSCORE'] = True
|
||||
|
||||
|
@ -28,6 +32,7 @@ if CONFIG['OS_ARCH'] in ('Linux', 'Bitrig', 'DragonFly', 'FreeBSD', 'NetBSD', 'O
|
|||
CONFIG['OS_ARCH'].startswith('GNU_'):
|
||||
if CONFIG['OS_TEST'] == 'x86_64':
|
||||
SOURCES += [
|
||||
'xptcinvoke_asm_x86_64_unix.S',
|
||||
'xptcinvoke_x86_64_unix.cpp',
|
||||
'xptcstubs_x86_64_linux.cpp',
|
||||
]
|
||||
|
@ -53,6 +58,7 @@ if CONFIG['OS_ARCH'] == 'SunOS' and '86' in CONFIG['OS_TEST']:
|
|||
if CONFIG['OS_TEST'] == 'x86_64':
|
||||
if CONFIG['GNU_CC']:
|
||||
SOURCES += [
|
||||
'xptcinvoke_asm_x86_64_unix.S',
|
||||
'xptcinvoke_x86_64_unix.cpp',
|
||||
'xptcstubs_x86_64_linux.cpp'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Darwin gives a leading '_' to symbols defined in C code.
|
||||
#ifdef XP_DARWIN
|
||||
#define SYM(x) _ ## x
|
||||
#else
|
||||
#define SYM(x) x
|
||||
#endif
|
||||
|
||||
.intel_syntax noprefix
|
||||
|
||||
# nsresult NS_InvokeByIndex(nsISupports* this, uint32_t aVtableIndex,
|
||||
# uint32_t argc, nsXPTCVariant* argv);
|
||||
.text
|
||||
.global SYM(NS_InvokeByIndex)
|
||||
#ifndef XP_DARWIN
|
||||
.type NS_InvokeByIndex, @function
|
||||
#endif
|
||||
.align 4
|
||||
SYM(NS_InvokeByIndex):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
# save r12 and r13 because we use them and they are callee saved.
|
||||
push r12
|
||||
push r13
|
||||
|
||||
# save this and the vtable index because we need them after setting up the
|
||||
# stack.
|
||||
mov r12, rdi
|
||||
mov r13, rsi
|
||||
|
||||
# allocate space for stack arguments, in theory we only need 8 * (argc - 5)
|
||||
# bytes because at least 5 arguments will go in registers, but for now it is
|
||||
# just simpler to allocate 8 * argc bytes. Note that we treat the this
|
||||
# pointer specially.
|
||||
lea eax, [edx * 8]
|
||||
sub rsp, rax
|
||||
|
||||
# If there is an odd number of args the stack can be misaligned so realign it.
|
||||
and rsp, 0xfffffffffffffff0
|
||||
|
||||
# pass the stack slot area to InvokeCopyToStack.
|
||||
mov r8, rsp
|
||||
|
||||
# setup space for the register slots: there are 5 integer ones and 8 floating
|
||||
# point ones. So we need 104 bytes of space, but we allocate 112 to keep rsp
|
||||
# aligned to 16 bytes.
|
||||
sub rsp, 112
|
||||
|
||||
# the first argument to InvokeCopyToStack is the integer register area, and the
|
||||
# second is the floating point area.
|
||||
mov rdi, rsp
|
||||
lea rsi, [rsp + 40]
|
||||
|
||||
# The 3rd and 4th arguments to InvokeCopyToStack are already in the right
|
||||
# registers. So now we can just call InvokeCopyToStack.
|
||||
call SYM(InvokeCopyToStack)
|
||||
|
||||
# setup this
|
||||
mov rdi, r12
|
||||
|
||||
# copy the integer arguments into place.
|
||||
mov rsi, [rsp]
|
||||
mov rdx, [rsp + 8]
|
||||
mov rcx, [rsp + 16]
|
||||
mov r8, [rsp + 24]
|
||||
mov r9, [rsp + 32]
|
||||
|
||||
# copy the float arguments into place
|
||||
movsd xmm0, [rsp + 40]
|
||||
movsd xmm1, [rsp + 48]
|
||||
movsd xmm2, [rsp + 56]
|
||||
movsd xmm3, [rsp + 64]
|
||||
movsd xmm4, [rsp + 72]
|
||||
movsd xmm5, [rsp + 80]
|
||||
movsd xmm6, [rsp + 88]
|
||||
movsd xmm7, [rsp + 96]
|
||||
|
||||
# get rid of the scratch space for registers
|
||||
add rsp, 112
|
||||
|
||||
# load the function pointer and call
|
||||
lea eax, [r13d * 8]
|
||||
add rax, [rdi]
|
||||
call [rax]
|
||||
|
||||
# r12 and r13 were pushed relative to the old stack pointer which is now the
|
||||
# frame pointer.
|
||||
mov r12, [rbp - 0x8]
|
||||
mov r13, [rbp - 0x10]
|
||||
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
#ifndef XP_DARWIN
|
||||
// Magic indicating no need for an executable stack
|
||||
.section .note.GNU-stack, "", @progbits ; .previous
|
||||
#endif
|
|
@ -8,46 +8,19 @@
|
|||
|
||||
#include "xptcprivate.h"
|
||||
|
||||
// 6 integral parameters are passed in registers
|
||||
const uint32_t GPR_COUNT = 6;
|
||||
// 6 integral parameters are passed in registers, but 1 is |this| which isn't
|
||||
// considered here.
|
||||
const uint32_t GPR_COUNT = 5;
|
||||
|
||||
// 8 floating point parameters are passed in SSE registers
|
||||
const uint32_t FPR_COUNT = 8;
|
||||
|
||||
// Remember that these 'words' are 64-bit long
|
||||
static inline void
|
||||
invoke_count_words(uint32_t paramCount, nsXPTCVariant * s,
|
||||
uint32_t & nr_stack)
|
||||
extern "C" void
|
||||
InvokeCopyToStack(uint64_t * gpregs, double * fpregs,
|
||||
uint32_t paramCount, nsXPTCVariant * s,
|
||||
uint64_t* d)
|
||||
{
|
||||
uint32_t nr_gpr;
|
||||
uint32_t nr_fpr;
|
||||
nr_gpr = 1; // skip one GP register for 'that'
|
||||
nr_fpr = 0;
|
||||
nr_stack = 0;
|
||||
|
||||
/* Compute number of eightbytes of class MEMORY. */
|
||||
for (uint32_t i = 0; i < paramCount; i++, s++) {
|
||||
if (!s->IsPtrData()
|
||||
&& (s->type == nsXPTType::T_FLOAT || s->type == nsXPTType::T_DOUBLE)) {
|
||||
if (nr_fpr < FPR_COUNT)
|
||||
nr_fpr++;
|
||||
else
|
||||
nr_stack++;
|
||||
}
|
||||
else {
|
||||
if (nr_gpr < GPR_COUNT)
|
||||
nr_gpr++;
|
||||
else
|
||||
nr_stack++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s,
|
||||
uint64_t * gpregs, double * fpregs)
|
||||
{
|
||||
uint32_t nr_gpr = 1u; // skip one GP register for 'that'
|
||||
uint32_t nr_gpr = 0u; // skip one GP register for 'that'
|
||||
uint32_t nr_fpr = 0u;
|
||||
uint64_t value = 0u;
|
||||
|
||||
|
@ -100,89 +73,3 @@ invoke_copy_to_stack(uint64_t * d, uint32_t paramCount, nsXPTCVariant * s,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disable avx for the next function to allow compilation with
|
||||
// -march=native on new machines, or similar hardcoded -march options.
|
||||
// Having avx enabled appears to change the alignment behavior of alloca
|
||||
// (apparently adding an extra 16 bytes) of padding/alignment (and using
|
||||
// 32-byte alignment instead of 16-byte). This seems to be the best
|
||||
// available workaround, given that this code, which should perhaps
|
||||
// better be written in assembly, is written in C++.
|
||||
#ifndef __clang__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("no-avx")
|
||||
#endif
|
||||
|
||||
// Avoid AddressSanitizer instrumentation for the next function because it
|
||||
// depends on __builtin_alloca behavior and alignment that cannot be relied on
|
||||
// once the function is compiled with a version of ASan that has dynamic-alloca
|
||||
// instrumentation enabled.
|
||||
|
||||
MOZ_ASAN_BLACKLIST
|
||||
EXPORT_XPCOM_API(nsresult)
|
||||
NS_InvokeByIndex(nsISupports * that, uint32_t methodIndex,
|
||||
uint32_t paramCount, nsXPTCVariant * params)
|
||||
{
|
||||
uint32_t nr_stack;
|
||||
invoke_count_words(paramCount, params, nr_stack);
|
||||
|
||||
// Stack, if used, must be 16-bytes aligned
|
||||
if (nr_stack)
|
||||
nr_stack = (nr_stack + 1) & ~1;
|
||||
|
||||
// Load parameters to stack, if necessary
|
||||
uint64_t *stack = (uint64_t *) __builtin_alloca(nr_stack * 8);
|
||||
uint64_t gpregs[GPR_COUNT];
|
||||
double fpregs[FPR_COUNT];
|
||||
invoke_copy_to_stack(stack, paramCount, params, gpregs, fpregs);
|
||||
|
||||
// We used to have switches to make sure we would only load the registers
|
||||
// that are needed for this call. That produced larger code that was
|
||||
// not faster in practice. It also caused compiler warnings about the
|
||||
// variables being used uninitialized.
|
||||
// We now just load every every register. There could still be a warning
|
||||
// from a memory analysis tools that we are loading uninitialized stack
|
||||
// positions.
|
||||
|
||||
// FIXME: this function depends on the above __builtin_alloca placing
|
||||
// the array in the correct spot for the ABI.
|
||||
|
||||
// Load FPR registers from fpregs[]
|
||||
double d0, d1, d2, d3, d4, d5, d6, d7;
|
||||
|
||||
d7 = fpregs[7];
|
||||
d6 = fpregs[6];
|
||||
d5 = fpregs[5];
|
||||
d4 = fpregs[4];
|
||||
d3 = fpregs[3];
|
||||
d2 = fpregs[2];
|
||||
d1 = fpregs[1];
|
||||
d0 = fpregs[0];
|
||||
|
||||
// Load GPR registers from gpregs[]
|
||||
uint64_t a0, a1, a2, a3, a4, a5;
|
||||
|
||||
a5 = gpregs[5];
|
||||
a4 = gpregs[4];
|
||||
a3 = gpregs[3];
|
||||
a2 = gpregs[2];
|
||||
a1 = gpregs[1];
|
||||
a0 = (uint64_t) that;
|
||||
|
||||
// Get pointer to method
|
||||
uint64_t methodAddress = *((uint64_t *)that);
|
||||
methodAddress += 8 * methodIndex;
|
||||
methodAddress = *((uint64_t *)methodAddress);
|
||||
|
||||
typedef nsresult (*Method)(uint64_t, uint64_t, uint64_t, uint64_t,
|
||||
uint64_t, uint64_t, double, double, double,
|
||||
double, double, double, double, double);
|
||||
nsresult result = ((Method)methodAddress)(a0, a1, a2, a3, a4, a5,
|
||||
d0, d1, d2, d3, d4, d5,
|
||||
d6, d7);
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifndef __clang__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче