Bug 1480550 - add ctypes support for aarch64 windows; r=dmajor

The bulk of this patch is the new win64.asm, which is a more-or-less
direct copy of aarch64's sysv.S file, with modifications for armasm64's
peculiarities.  The changes to ffi.c were minimal, mostly so that
arithmetic on `void*` conforms to the C standard.
This commit is contained in:
Nathan Froyd 2018-10-26 13:00:41 -04:00
Родитель a62c29cd3e
Коммит 9d90fe9902
5 изменённых файлов: 312 добавлений и 11 удалений

2
config/external/ffi/moz.build поставляемый
Просмотреть файл

@ -77,6 +77,8 @@ else:
ASFLAGS += ['-no-integrated-as']
elif CONFIG['FFI_TARGET'] == 'AARCH64':
ffi_srcs = ('sysv.S', 'ffi.c')
elif CONFIG['FFI_TARGET'] == 'ARM64_WIN64':
ffi_srcs = ('win64.asm', 'ffi.c')
elif CONFIG['FFI_TARGET'] == 'X86':
ffi_srcs = ('ffi.c', 'sysv.S', 'win32.S')
elif CONFIG['FFI_TARGET'] == 'X86_64':

Просмотреть файл

@ -33,11 +33,11 @@ def ffi_target(target):
'Use --with-system-ffi instead.')
if target.os == 'WINNT':
target_dir = 'x86'
if target.cpu == 'x86_64':
target_name = 'X86_WIN64'
else:
target_name = 'X86_WIN32'
target_dir, target_name = {
'x86_64': ('x86', 'X86_WIN64'),
'x86': ('x86', 'X86_WIN32'),
'aarch64': ('aarch64', 'ARM64_WIN64'),
}[target.cpu]
elif target.os == 'OSX':
target_dir = 'x86'
target_name = 'X86_DARWIN'

Просмотреть файл

@ -6808,7 +6808,13 @@ GetABI(JSContext* cx, HandleValue abiType, ffi_abi* result)
return true;
case ABI_THISCALL:
#if defined(_WIN64)
#if defined(_M_X64)
*result = FFI_WIN64;
#elif defined(_M_ARM64)
*result = FFI_SYSV;
#else
#error unknown 64-bit Windows platform
#endif
return true;
#elif defined(_WIN32)
*result = FFI_THISCALL;
@ -6824,7 +6830,13 @@ GetABI(JSContext* cx, HandleValue abiType, ffi_abi* result)
#elif (defined(_WIN64))
// We'd like the same code to work across Win32 and Win64, so stdcall_api
// and winapi_abi become aliases to the lone Win64 ABI.
#if defined(_M_X64)
*result = FFI_WIN64;
#elif defined(_M_ARM64)
*result = FFI_SYSV;
#else
#error unknown 64-bit Windows platform
#endif
return true;
#endif
case INVALID_ABI:

Просмотреть файл

@ -26,6 +26,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include <stdlib.h>
#if defined(_WIN32)
#if !defined(WIN32_LEAN_AND_MEAN)
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#endif
/* Stack alignment requirement in bytes */
#if defined (__APPLE__)
#define AARCH64_STACK_ALIGN 1
@ -65,6 +72,9 @@ ffi_clear_cache (void *start, void *end)
sys_icache_invalidate (start, (char *)end - (char *)start);
#elif defined (__GNUC__)
__builtin___clear_cache (start, end);
#elif defined (_WIN32)
FlushInstructionCache (GetCurrentProcess (), start,
(char*)end - (char*)start);
#else
#error "Missing builtin to flush instruction cache"
#endif
@ -219,6 +229,10 @@ get_basic_type_size (unsigned short type)
}
}
// XXX The Win64 and the SYSV ABI are very close, differing only in their
// calling of varargs functions. Since we don't care about calling varargs
// functions in our use of libffi, we just hack our way through and use the
// SYSV-designated functions everywhere.
extern void
ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
extended_cif *),
@ -491,7 +505,7 @@ allocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
state->nsaa = ALIGN (state->nsaa, 8);
#endif
allocation = stack + state->nsaa;
allocation = (char*)stack + state->nsaa;
state->nsaa += size;
return allocation;
@ -575,7 +589,7 @@ copy_hfa_to_reg_or_stack (void *memory,
{
void *reg = allocate_to_v (context, state);
copy_basic_type (reg, memory, type);
memory += get_basic_type_size (type);
memory = (char*)memory + get_basic_type_size (type);
}
}
}
@ -859,7 +873,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
void *reg = get_basic_type_addr (type, &context, j);
copy_basic_type (rvalue, reg, type);
rvalue += get_basic_type_size (type);
rvalue = (char*)rvalue + get_basic_type_size (type);
}
}
else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
@ -902,7 +916,8 @@ static unsigned char trampoline [] =
/* Build a trampoline. */
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
({unsigned char *__tramp = (unsigned char*)(TRAMP); \
do { \
unsigned char *__tramp = (unsigned char*)(TRAMP); \
UINT64 __fun = (UINT64)(FUN); \
UINT64 __ctx = (UINT64)(CTX); \
UINT64 __flags = (UINT64)(FLAGS); \
@ -911,7 +926,7 @@ static unsigned char trampoline [] =
memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
})
} while(0)
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
@ -1141,7 +1156,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
{
void *reg = get_basic_type_addr (type, context, j);
copy_basic_type (reg, rvalue, type);
rvalue += get_basic_type_size (type);
rvalue = (char*)rvalue + get_basic_type_size (type);
}
}
else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)

Просмотреть файл

@ -0,0 +1,272 @@
;; Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
;; Permission is hereby granted, free of charge, to any person obtaining
;; a copy of this software and associated documentation files (the
;; ``Software''), to deal in the Software without restriction, including
;; without limitation the rights to use, copy, modify, merge, publish,
;; distribute, sublicense, and/or sell copies of the Software, and to
;; permit persons to whom the Software is furnished to do so, subject to
;; the following conditions:
;; The above copyright notice and this permission notice shall be
;; included in all copies or substantial portions of the Software.
;; THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
;; Hand-converted from the sysv.S file in this directory.
AREA |.text|, CODE, ARM64
;; ffi_call_SYSV()
;; Create a stack frame, setup an argument context, call the callee
;; and extract the result.
;; The maximum required argument stack size is provided,
;; ffi_call_SYSV() allocates that stack space then calls the
;; prepare_fn to populate register context and stack. The
;; argument passing registers are loaded from the register
;; context and the callee called, on return the register passing
;; register are saved back to the context. Our caller will
;; extract the return value from the final state of the saved
;; register context.
;; Prototype:
;; extern unsigned
;; ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
;; extended_cif *),
;; struct call_context *context,
;; extended_cif *,
;; size_t required_stack_size,
;; void (*fn)(void));
;; Therefore on entry we have:
;; x0 prepare_fn
;; x1 &context
;; x2 &ecif
;; x3 bytes
;; x4 fn
;; This function uses the following stack frame layout:
;; ==
;; saved x30(lr)
;; x29(fp)-> saved x29(fp)
;; saved x24
;; saved x23
;; saved x22
;; sp' -> saved x21
;; ...
;; sp -> (constructed callee stack arguments)
;; ==
;; Voila!
EXPORT |ffi_call_SYSV|
|ffi_call_SYSV| PROC
;#define ffi_call_SYSV_FS (8 * 4)
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32 ; ffi_call_SYSV_FS
stp x21, x22, [sp, #0]
stp x23, x24, [sp, #16]
mov x21, x1
mov x22, x2
mov x24, x4
; Allocate the stack space for the actual arguments, many
; arguments will be passed in registers, but we assume
; worst case and allocate sufficient stack for ALL of
; the arguments.
sub sp, sp, x3
; unsigned (*prepare_fn) (struct call_context *context,
; unsigned char *stack, extended_cif *ecif);
mov x23, x0
mov x0, x1
mov x1, sp
; x2 already in place
blr x23
; Preserve the flags returned.
mov x23, x0
; Figure out if we should touch the vector registers.
tbz x23, #0, noload_call
; Load the vector argument passing registers.
ldp q0, q1, [x21, #8*32 + 0]
ldp q2, q3, [x21, #8*32 + 32]
ldp q4, q5, [x21, #8*32 + 64]
ldp q6, q7, [x21, #8*32 + 96]
noload_call
; Load the core argument passing registers.
ldp x0, x1, [x21, #0]
ldp x2, x3, [x21, #16]
ldp x4, x5, [x21, #32]
ldp x6, x7, [x21, #48]
; Don't forget x8 which may be holding the address of a return buffer.
ldr x8, [x21, #8*8]
blr x24
; Save the core argument passing registers.
stp x0, x1, [x21, #0]
stp x2, x3, [x21, #16]
stp x4, x5, [x21, #32]
stp x6, x7, [x21, #48]
; Note nothing useful ever comes back in x8!
; Figure out if we should touch the vector registers.
tbz x23, #0, nosave_call ; AARCH64_FFI_WITH_V_BIT
; Save the vector argument passing registers.
stp q0, q1, [x21, #8*32 + 0]
stp q2, q3, [x21, #8*32 + 32]
stp q4, q5, [x21, #8*32 + 64]
stp q6, q7, [x21, #8*32 + 96]
nosave_call
; All done, unwind our stack frame.
ldp x21, x22, [x29, # - 32] ; ffi_call_SYSV_FS
ldp x23, x24, [x29, # - 32 + 16] ; ffi_call_SYSV_FS
mov sp, x29
ldp x29, x30, [sp], #16
ret
ENDP
; #define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
;; ffi_closure_SYSV
;; Closure invocation glue. This is the low level code invoked directly by
;; the closure trampoline to setup and call a closure.
;; On entry x17 points to a struct trampoline_data, x16 has been clobbered
;; all other registers are preserved.
;; We allocate a call context and save the argument passing registers,
;; then invoked the generic C ffi_closure_SYSV_inner() function to do all
;; the real work, on return we load the result passing registers back from
;; the call context.
;; On entry
;; extern void
;; ffi_closure_SYSV (struct trampoline_data *);
;; struct trampoline_data
;; {
;; UINT64 *ffi_closure;
;; UINT64 flags;
;; };
;; This function uses the following stack frame layout:
;; ==
;; saved x30(lr)
;; x29(fp)-> saved x29(fp)
;; saved x22
;; saved x21
;; ...
;; sp -> call_context
;; ==
;; Voila!
IMPORT |ffi_closure_SYSV_inner|
EXPORT |ffi_closure_SYSV|
|ffi_closure_SYSV| PROC
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #256+512+16
stp x21, x22, [x29, #-16]
; Load x21 with &call_context.
mov x21, sp
; Preserve our struct trampoline_data
mov x22, x17
; Save the rest of the argument passing registers.
stp x0, x1, [x21, #0]
stp x2, x3, [x21, #16]
stp x4, x5, [x21, #32]
stp x6, x7, [x21, #48]
; Don't forget we may have been given a result scratch pad address.
str x8, [x21, #64]
; Figure out if we should touch the vector registers.
ldr x0, [x22, #8]
tbz x0, #0, nosave_closure ; AARCH64_FFI_WITH_V_BIT
; Save the argument passing vector registers.
stp q0, q1, [x21, #8*32 + 0]
stp q2, q3, [x21, #8*32 + 32]
stp q4, q5, [x21, #8*32 + 64]
stp q6, q7, [x21, #8*32 + 96]
nosave_closure
; Load &ffi_closure..
ldr x0, [x22, #0]
mov x1, x21
; Compute the location of the stack at the point that the
; trampoline was called.
add x2, x29, #16
bl ffi_closure_SYSV_inner
; Figure out if we should touch the vector registers.
ldr x0, [x22, #8]
tbz x0, #0, noload_closure ; AARCH64_FFI_WITH_V_BIT
; Load the result passing vector registers.
ldp q0, q1, [x21, #8*32 + 0]
ldp q2, q3, [x21, #8*32 + 32]
ldp q4, q5, [x21, #8*32 + 64]
ldp q6, q7, [x21, #8*32 + 96]
noload_closure
; Load the result passing core registers.
ldp x0, x1, [x21, #0]
ldp x2, x3, [x21, #16]
ldp x4, x5, [x21, #32]
ldp x6, x7, [x21, #48]
; Note nothing useful is returned in x8.
; We are done, unwind our frame.
ldp x21, x22, [x29, #-16]
mov sp, x29
ldp x29, x30, [sp], #16
ret
ENDP
END