Fix and improve coroutines for Darwin (macOS) ppc/ppc64. (#5975)

This commit is contained in:
Sergey Fedorov 2022-10-19 18:49:45 +08:00 коммит произвёл GitHub
Родитель fc3137ef54
Коммит 567725ed30
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
17 изменённых файлов: 196 добавлений и 120 удалений

Просмотреть файл

@ -869,8 +869,11 @@ typedef struct {
int type;
} DebugInfoValue;
/* TODO: Big Endian */
#if defined(WORDS_BIGENDIAN)
#define MERGE_2INTS(a,b,sz) (((uint64_t)(a)<<sz)|(b))
#else
#define MERGE_2INTS(a,b,sz) (((uint64_t)(b)<<sz)|(a))
#endif
static uint16_t
get_uint16(const uint8_t *p)

Просмотреть файл

@ -1902,8 +1902,8 @@ AS_CASE(["${target_cpu}-${target_os}:${target_archs}"],
[universal-darwin*:*ppc*], [
AC_LIBSOURCES(alloca.c)
AC_SUBST([ALLOCA], [\${LIBOBJDIR}alloca.${ac_objext}])
RUBY_DEFINE_IF([defined __powerpc__], C_ALLOCA, 1)
RUBY_DEFINE_IF([defined __powerpc__], alloca, alloca)
RUBY_DEFINE_IF([defined __POWERPC__], C_ALLOCA, 1) # Darwin defines __POWERPC__ for ppc and ppc64 both
RUBY_DEFINE_IF([defined __POWERPC__], alloca, alloca)
],
[
AC_FUNC_ALLOCA
@ -2573,10 +2573,13 @@ AS_CASE([$coroutine_type], [yes|''], [
[arm64-darwin*], [
coroutine_type=arm64
],
[powerpc-darwin*], [
# Correct target name is powerpc*-, but Ruby seems to prefer ppc*-.
# Notice that Darwin PPC ABI differs from AIX and ELF.
# Adding PPC targets for AIX, *BSD and *Linux will require separate implementations.
[powerpc-darwin*|ppc-darwin*], [
coroutine_type=ppc
],
[powerpc64-darwin*], [
[powerpc64-darwin*|ppc64-darwin*], [
coroutine_type=ppc64
],
[x*64-linux*], [

Просмотреть файл

@ -1,73 +1,90 @@
; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022.
; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine.
; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S
; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S
; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only
; ppc32 version may be re-written compactly with stmw/lwm, but the code wonʼt be faster, see: https://github.com/ruby/ruby/pull/5927#issuecomment-1139730541
; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF.
; To add support for AIX, *BSD or *Linux, please make separate implementations.
#define TOKEN_PASTE(x,y) x##y
#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name)
.machine ppc7400 ; = G4, Rosetta
.text
.align 2
.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer)
.align 2
PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer):
# Make space on the stack for caller registers
addi r1,r1,-80
; Make space on the stack for caller registers
; (Should we rather use red zone? See libphobos example.)
subi r1,r1,80
# Save caller registers
stw r13,0(r1)
stw r14,4(r1)
stw r15,8(r1)
stw r16,12(r1)
stw r17,16(r1)
stw r18,20(r1)
stw r19,24(r1)
stw r20,28(r1)
stw r21,32(r1)
stw r22,36(r1)
stw r23,40(r1)
stw r24,44(r1)
stw r25,48(r1)
stw r26,52(r1)
stw r27,56(r1)
stw r28,60(r1)
stw r29,64(r1)
stw r30,68(r1)
stw r31,72(r1)
# Save return address
; Get LR
mflr r0
; Save caller registers
stw r31,0(r1)
stw r30,4(r1)
stw r29,8(r1)
stw r28,12(r1)
stw r27,16(r1)
stw r26,20(r1)
stw r25,24(r1)
stw r24,28(r1)
stw r23,32(r1)
stw r22,36(r1)
stw r21,40(r1)
stw r20,44(r1)
stw r19,48(r1)
stw r18,52(r1)
stw r17,56(r1)
stw r16,60(r1)
stw r15,64(r1)
stw r14,68(r1)
stw r13,72(r1)
; Save return address
; Possibly should rather be saved into linkage area, see libphobos and IBM docs
stw r0,76(r1)
# Save stack pointer to first argument
; Save stack pointer to first argument
stw r1,0(r3)
# Load stack pointer from second argument
; Load stack pointer from second argument
lwz r1,0(r4)
# Restore caller registers
lwz r13,0(r1)
lwz r14,4(r1)
lwz r15,8(r1)
lwz r16,12(r1)
lwz r17,16(r1)
lwz r18,20(r1)
lwz r19,24(r1)
lwz r20,28(r1)
lwz r21,32(r1)
lwz r22,36(r1)
lwz r23,40(r1)
lwz r24,44(r1)
lwz r25,48(r1)
lwz r26,52(r1)
lwz r27,56(r1)
lwz r28,60(r1)
lwz r29,64(r1)
lwz r30,68(r1)
lwz r31,72(r1)
# Load return address
; Load return address
lwz r0,76(r1)
; Restore caller registers
lwz r13,72(r1)
lwz r14,68(r1)
lwz r15,64(r1)
lwz r16,60(r1)
lwz r17,56(r1)
lwz r18,52(r1)
lwz r19,48(r1)
lwz r20,44(r1)
lwz r21,40(r1)
lwz r22,36(r1)
lwz r23,32(r1)
lwz r24,28(r1)
lwz r25,24(r1)
lwz r26,20(r1)
lwz r27,16(r1)
lwz r28,12(r1)
lwz r29,8(r1)
lwz r30,4(r1)
lwz r31,0(r1)
; Set LR
mtlr r0
# Pop stack frame
; Pop stack frame
addi r1,r1,80
# Jump to return address
; Jump to return address
blr

Просмотреть файл

@ -9,6 +9,7 @@
#include <string.h>
#define COROUTINE __attribute__((noreturn)) void
#define COROUTINE_LIMITED_ADDRESS_SPACE
enum {
COROUTINE_REGISTERS =

Просмотреть файл

@ -1,70 +1,89 @@
; Based on the code by Samuel Williams. Created by Sergey Fedorov on 04/06/2022.
; Credits to Samuel Williams, Rei Odaira and Iain Sandoe. Errors, if any, are mine.
; Some relevant examples: https://github.com/gcc-mirror/gcc/blob/master/libphobos/libdruntime/config/powerpc/switchcontext.S
; https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/rs6000/darwin-gpsave.S
; https://www.ibm.com/docs/en/aix/7.2?topic=epilogs-saving-gprs-only
; Notice that this code is only for Darwin (macOS). Darwin ABI differs from AIX and ELF.
; To add support for AIX, *BSD or *Linux, please make separate implementations.
#define TOKEN_PASTE(x,y) x##y
#define PREFIXED_SYMBOL(prefix,name) TOKEN_PASTE(prefix,name)
.machine ppc64 ; = G5
.text
.align 3
.globl PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer)
.align 2
PREFIXED_SYMBOL(SYMBOL_PREFIX,coroutine_transfer):
# Make space on the stack for caller registers
addi r1,r1,-152
; Make space on the stack for caller registers
; (Should we rather use red zone? See libphobos example.)
subi r1,r1,160
# Save caller registers
std r14,0(r1)
std r15,8(r1)
std r16,16(r1)
std r17,24(r1)
std r18,32(r1)
std r19,40(r1)
std r20,48(r1)
std r21,56(r1)
std r22,64(r1)
std r23,72(r1)
std r24,80(r1)
std r25,88(r1)
std r26,96(r1)
std r27,104(r1)
std r28,112(r1)
std r29,120(r1)
std r30,128(r1)
std r31,136(r1)
# Save return address
; Get LR
mflr r0
std r0,144(r1)
# Save stack pointer to first argument
; Save caller registers
std r31,0(r1)
std r30,8(r1)
std r29,16(r1)
std r28,24(r1)
std r27,32(r1)
std r26,40(r1)
std r25,48(r1)
std r24,56(r1)
std r23,64(r1)
std r22,72(r1)
std r21,80(r1)
std r20,88(r1)
std r19,96(r1)
std r18,104(r1)
std r17,112(r1)
std r16,120(r1)
std r15,128(r1)
std r14,136(r1)
std r13,144(r1)
; Save return address
; Possibly should rather be saved into linkage area, see libphobos and IBM docs
std r0,152(r1)
; Save stack pointer to first argument
std r1,0(r3)
# Load stack pointer from second argument
; Load stack pointer from second argument
ld r1,0(r4)
# Restore caller registers
ld r14,0(r1)
ld r15,8(r1)
ld r16,16(r1)
ld r17,24(r1)
ld r18,32(r1)
ld r19,40(r1)
ld r20,48(r1)
ld r21,56(r1)
ld r22,64(r1)
ld r23,72(r1)
ld r24,80(r1)
ld r25,88(r1)
ld r26,96(r1)
ld r27,104(r1)
ld r28,112(r1)
ld r29,120(r1)
ld r30,128(r1)
ld r31,136(r1)
; Load return address
ld r0,152(r1)
# Load return address
ld r0,144(r1)
; Restore caller registers
ld r13,144(r1)
ld r14,136(r1)
ld r15,128(r1)
ld r16,120(r1)
ld r17,112(r1)
ld r18,104(r1)
ld r19,96(r1)
ld r20,88(r1)
ld r21,80(r1)
ld r22,72(r1)
ld r23,64(r1)
ld r24,56(r1)
ld r25,48(r1)
ld r26,40(r1)
ld r27,32(r1)
ld r28,24(r1)
ld r29,16(r1)
ld r30,8(r1)
ld r31,0(r1)
; Set LR
mtlr r0
# Pop stack frame
addi r1,r1,152
; Pop stack frame
addi r1,r1,160
# Jump to return address
; Jump to return address
blr

Просмотреть файл

@ -12,7 +12,7 @@
enum {
COROUTINE_REGISTERS =
19 /* 18 general purpose registers (r14–r31) and 1 return address */
20 /* 19 general purpose registers (r13–r31) and 1 return address */
+ 4 /* space for fiber_entry() to store the link register */
};
@ -44,7 +44,7 @@ static inline void coroutine_initialize(
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
/* Skip a global prologue that sets the TOC register */
context->stack_pointer[18] = ((char*)start) + 8;
context->stack_pointer[19] = ((char*)start) + 8;
}
struct coroutine_context * coroutine_transfer(struct coroutine_context * current, struct coroutine_context * target);

6
dln.c
Просмотреть файл

@ -41,6 +41,10 @@ static void dln_loaderror(const char *format, ...);
# include <strings.h>
#endif
#if defined __APPLE__
# include <AvailabilityMacros.h>
#endif
#ifndef xmalloc
void *xmalloc();
void *xcalloc();
@ -58,7 +62,7 @@ void *xrealloc();
#include <sys/stat.h>
#ifndef S_ISDIR
# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
#endif
#ifdef HAVE_SYS_PARAM_H

Просмотреть файл

@ -668,7 +668,7 @@ bug_important_message(FILE *out, const char *const msg, size_t len)
#undef CRASH_REPORTER_MAY_BE_CREATED
#if defined(__APPLE__) && \
(!defined(MAC_OS_X_VERSION_10_6) || MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_6)
(!defined(MAC_OS_X_VERSION_10_6) || MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_6 || defined(__POWERPC__)) /* 10.6 PPC case */
# define CRASH_REPORTER_MAY_BE_CREATED
#endif
static void

21
gc.c
Просмотреть файл

@ -1359,6 +1359,27 @@ tick(void)
return val;
}
/* Implementation for macOS PPC by @nobu
* See: https://github.com/ruby/ruby/pull/5975#discussion_r890045558
*/
#elif defined(__POWERPC__) && defined(__APPLE__)
typedef unsigned long long tick_t;
#define PRItick "llu"
static __inline__ tick_t
tick(void)
{
unsigned long int upper, lower, tmp;
# define mftbu(r) __asm__ volatile("mftbu %0" : "=r"(r))
# define mftb(r) __asm__ volatile("mftb %0" : "=r"(r))
do {
mftbu(upper);
mftb(lower);
mftbu(tmp);
} while (tmp != upper);
return ((tick_t)upper << 32) | lower;
}
#elif defined(__aarch64__) && defined(__GNUC__)
typedef unsigned long tick_t;
#define PRItick "lu"

4
gc.h
Просмотреть файл

@ -6,10 +6,12 @@
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("movq\t%%rsp, %0" : "=r" (*(p)))
#elif defined(__i386) && defined(__GNUC__)
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("movl\t%%esp, %0" : "=r" (*(p)))
#elif (defined(__powerpc__) || defined(__powerpc64__)) && defined(__GNUC__) && !defined(_AIX)
#elif (defined(__powerpc__) || defined(__powerpc64__)) && defined(__GNUC__) && !defined(_AIX) && !defined(__APPLE__) // Not Apple is NEEDED to unbreak ppc64 build on Darwin. Don't ask.
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mr\t%0, %%r1" : "=r" (*(p)))
#elif (defined(__powerpc__) || defined(__powerpc64__)) && defined(__GNUC__) && defined(_AIX)
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mr %0,1" : "=r" (*(p)))
#elif defined(__POWERPC__) && defined(__APPLE__) // Darwin ppc and ppc64
#define SET_MACHINE_STACK_END(p) __asm__ volatile("mr %0, r1" : "=r" (*(p)))
#elif defined(__aarch64__) && defined(__GNUC__)
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mov\t%0, sp" : "=r" (*(p)))
#else

Просмотреть файл

@ -113,6 +113,8 @@
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__powerpc64__)
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__POWERPC__) // __POWERPC__ is defined for ppc and ppc64 on Darwin
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__aarch64__)
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__mc68020__)

Просмотреть файл

@ -49,10 +49,11 @@
# endif
#endif
/* __POWERPC__ added to accommodate Darwin case. */
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# else

Просмотреть файл

@ -34,10 +34,11 @@
#error "Only strictly little or big endian supported"
#endif
/* __POWERPC__ added to accommodate Darwin case. */
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# endif

3
st.c
Просмотреть файл

@ -1671,10 +1671,11 @@ st_values_check(st_table *tab, st_data_t *values, st_index_t size,
*/
#define FNV_32_PRIME 0x01000193
/* __POWERPC__ added to accommodate Darwin case. */
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# endif

Просмотреть файл

@ -5,6 +5,7 @@ AS_CASE([$1],
[arm64], [],
[*64], [ARCH_FLAG=-m64],
[[i[3-6]86]], [ARCH_FLAG=-m32],
[ppc], [ARCH_FLAG=-m32],
[AC_MSG_ERROR(unknown target architecture: $target_archs)]
)
AC_MSG_RESULT([$ARCH_FLAG])

Просмотреть файл

@ -780,8 +780,8 @@ typedef struct rb_vm_struct {
#define RUBY_VM_FIBER_VM_STACK_SIZE ( 16 * 1024 * sizeof(VALUE)) /* 64 KB or 128 KB */
#define RUBY_VM_FIBER_VM_STACK_SIZE_MIN ( 2 * 1024 * sizeof(VALUE)) /* 8 KB or 16 KB */
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE ( 64 * 1024 * sizeof(VALUE)) /* 256 KB or 512 KB */
#if defined(__powerpc64__)
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN ( 32 * 1024 * sizeof(VALUE)) /* 128 KB or 256 KB */
#if defined(__powerpc64__) || defined(__ppc64__) // macOS has __ppc64__
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN ( 32 * 1024 * sizeof(VALUE)) /* 128 KB or 256 KB */
#else
#define RUBY_VM_FIBER_MACHINE_STACK_SIZE_MIN ( 16 * 1024 * sizeof(VALUE)) /* 64 KB or 128 KB */
#endif

Просмотреть файл

@ -55,7 +55,7 @@ static void vm_insns_counter_count_insn(int insn) {}
#elif defined(__GNUC__) && defined(__i386__)
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("e" reg)
#elif defined(__GNUC__) && defined(__powerpc64__)
#elif defined(__GNUC__) && (defined(__powerpc64__) || defined(__POWERPC__))
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg)
#elif defined(__GNUC__) && defined(__aarch64__)
@ -92,7 +92,7 @@ vm_exec_core(rb_execution_context_t *ec, VALUE initial)
DECL_SC_REG(rb_control_frame_t *, cfp, "15");
#define USE_MACHINE_REGS 1
#elif defined(__GNUC__) && defined(__powerpc64__)
#elif defined(__GNUC__) && (defined(__powerpc64__) || defined(__POWERPC__))
DECL_SC_REG(const VALUE *, pc, "14");
DECL_SC_REG(rb_control_frame_t *, cfp, "15");
#define USE_MACHINE_REGS 1