Enable arm64 optimizations that exist for power/x86 (#3393)

* Enable unaligned accesses on arm64

64-bit Arm platforms support unaligned accesses.

Running the string benchmarks this change improves performance
by an average of 1.04x, min .96x, max 1.21x, median 1.01x

* arm64 enable gc optimizations

Similar to x86 and powerpc optimizations.

|       |compare-ruby|built-ruby|
|:------|-----------:|---------:|
|hash1  |       0.225|     0.237|
|       |           -|     1.05x|
|hash2  |       0.110|     0.110|
|       |       1.00x|         -|

* vm_exec.c: improve performance for arm64

|                               |compare-ruby|built-ruby|
|:------------------------------|-----------:|---------:|
|vm_array                       |     26.501M|   27.959M|
|                               |           -|     1.06x|
|vm_attr_ivar                   |     21.606M|   31.429M|
|                               |           -|     1.45x|
|vm_attr_ivar_set               |     21.178M|   26.113M|
|                               |           -|     1.23x|
|vm_backtrace                   |       6.621|     6.668|
|                               |           -|     1.01x|
|vm_bigarray                    |     26.205M|   29.958M|
|                               |           -|     1.14x|
|vm_bighash                     |    504.155k|  479.306k|
|                               |       1.05x|         -|
|vm_block                       |     16.692M|   21.315M|
|                               |           -|     1.28x|
|block_handler_type_iseq        |       5.083|     7.004|
|                               |           -|     1.38x|
This commit is contained in:
AGSaidi 2020-08-13 12:15:54 -05:00 коммит произвёл GitHub
Родитель 787cb0fd86
Коммит 511b55bcef
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 28 добавлений и 3 удалений

13
gc.c
Просмотреть файл

@ -1115,6 +1115,19 @@ tick(void)
return val;
}
#elif defined(__aarch64__) && defined(__GNUC__)
typedef unsigned long tick_t;
#define PRItick "lu"
static __inline__ tick_t
tick(void)
{
unsigned long val;
__asm__ __volatile__ ("mrs %0, cntvct_el0", : "=r" (val));
return val;
}
#elif defined(_WIN32) && defined(_MSC_VER)
#include <intrin.h>
typedef unsigned __int64 tick_t;

2
gc.h
Просмотреть файл

@ -8,6 +8,8 @@
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("movl\t%%esp, %0" : "=r" (*(p)))
#elif defined(__powerpc64__) && defined(__GNUC__)
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mr\t%0, %%r1" : "=r" (*(p)))
#elif defined(__aarch64__) && defined(__GNUC__)
#define SET_MACHINE_STACK_END(p) __asm__ __volatile__ ("mov\t%0, sp" : "=r" (*(p)))
#else
NOINLINE(void rb_gc_set_stack_end(VALUE **stack_end_p));
#define SET_MACHINE_STACK_END(p) rb_gc_set_stack_end(p)

Просмотреть файл

@ -103,6 +103,8 @@
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__powerpc64__)
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__aarch64__)
# define UNALIGNED_WORD_ACCESS 1
#elif defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
#else

Просмотреть файл

@ -52,7 +52,7 @@
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# else

Просмотреть файл

@ -30,7 +30,7 @@
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# endif

2
st.c
Просмотреть файл

@ -1662,7 +1662,7 @@ st_values_check(st_table *tab, st_data_t *values, st_index_t size,
#ifndef UNALIGNED_WORD_ACCESS
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
defined(__powerpc64__) || \
defined(__powerpc64__) || defined(__aarch64__) || \
defined(__mc68020__)
# define UNALIGNED_WORD_ACCESS 1
# endif

Просмотреть файл

@ -57,6 +57,9 @@ static void vm_insns_counter_count_insn(int insn) {}
#elif defined(__GNUC__) && defined(__powerpc64__)
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg)
#elif defined(__GNUC__) && defined(__aarch64__)
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("x" reg)
#else
#define DECL_SC_REG(type, r, reg) register type reg_##r
#endif
@ -93,6 +96,11 @@ vm_exec_core(rb_execution_context_t *ec, VALUE initial)
DECL_SC_REG(rb_control_frame_t *, cfp, "15");
#define USE_MACHINE_REGS 1
#elif defined(__GNUC__) && defined(__aarch64__)
DECL_SC_REG(const VALUE *, pc, "19");
DECL_SC_REG(rb_control_frame_t *, cfp, "20");
#define USE_MACHINE_REGS 1
#else
register rb_control_frame_t *reg_cfp;
const VALUE *reg_pc;