From ceebc7fc98d654e8d128e115bdca243c32711fa1 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 27 Oct 2020 18:49:17 -0400 Subject: [PATCH] MicroJIT: generate less code for CFUNCs Added UJIT_CHECK_MODE. Set to 1 to double check method dispatch in generated code. It's surprising to me that we need to watch both cc and cme. There might be opportunities to simplify there. --- gc.c | 2 +- ujit_asm.c | 2 +- ujit_asm.h | 2 +- ujit_compile.c | 273 ++++++++++++++++++++++++++++++++++++++++++++----- ujit_compile.h | 2 + vm_callinfo.h | 11 +- vm_method.c | 15 +++ 7 files changed, 270 insertions(+), 37 deletions(-) diff --git a/gc.c b/gc.c index c97246a9cc..1a7e39485d 100644 --- a/gc.c +++ b/gc.c @@ -2564,7 +2564,7 @@ vm_ccs_free(struct rb_class_cc_entries *ccs, int alive, rb_objspace_t *objspace, asan_poison_object((VALUE)cc); } } - vm_cc_invalidate(cc); + rb_vm_cc_invalidate(cc); } ruby_xfree(ccs->entries); } diff --git a/ujit_asm.c b/ujit_asm.c index 3ffc5503b0..4cb7bb7caa 100644 --- a/ujit_asm.c +++ b/ujit_asm.c @@ -73,7 +73,7 @@ x86opnd_t imm_opnd(int64_t imm) return opnd; } -x86opnd_t const_ptr_opnd(void* ptr) +x86opnd_t const_ptr_opnd(const void *ptr) { x86opnd_t opnd = { OPND_IMM, diff --git a/ujit_asm.h b/ujit_asm.h index cf8c72f30d..43b3665486 100644 --- a/ujit_asm.h +++ b/ujit_asm.h @@ -220,7 +220,7 @@ x86opnd_t mem_opnd(size_t num_bits, x86opnd_t base_reg, int32_t disp); x86opnd_t imm_opnd(int64_t val); // Constant pointer operand -x86opnd_t const_ptr_opnd(void* ptr); +x86opnd_t const_ptr_opnd(const void *ptr); // Struct member operand #define member_opnd(base_reg, struct_type, member_name) mem_opnd( \ diff --git a/ujit_compile.c b/ujit_compile.c index b95feca3f9..3ef7b27db1 100644 --- a/ujit_compile.c +++ b/ujit_compile.c @@ -21,6 +21,14 @@ #define PLATFORM_SUPPORTED_P 1 #endif +#ifndef UJIT_CHECK_MODE +#define UJIT_CHECK_MODE 0 +#endif + +#ifndef UJIT_DUMP_MODE +#define UJIT_DUMP_MODE 0 +#endif + bool rb_ujit_enabled; // Hash table of encoded instructions @@ -35,7 +43,12 @@ typedef struct ctx_struct // Difference between the current stack pointer and actual stack top int32_t stack_diff; + // The iseq that owns the region that is compiling const rb_iseq_t *iseq; + // Index in the iseq to the opcode we are replacing + size_t replacement_idx; + // The start of output code + uint8_t *region_start; } ctx_t; @@ -82,6 +95,137 @@ addr2insn_bookkeeping(void *code_ptr, int insn) } } +// GC root for interacting with the GC +struct ujit_root_struct {}; + +// Map cme_or_cc => [[iseq, offset]]. An entry in the map means compiled code at iseq[offset] +// is only valid when cme_or_cc is valid +static st_table *method_lookup_dependency; + +struct compiled_region_array { + int32_t size; + int32_t capa; + struct compiled_region { + const rb_iseq_t *iseq; + size_t replacement_idx; + uint8_t *code; + }data[]; +}; + +// Add an element to a region array, or allocate a new region array. +static struct compiled_region_array * +add_compiled_region(struct compiled_region_array *array, const rb_iseq_t *iseq, size_t replacement_idx, uint8_t *code) +{ + if (!array) { + // Allocate a brand new array with space for one + array = malloc(sizeof(*array) + sizeof(struct compiled_region)); + if (!array) { + return NULL; + } + array->size = 0; + array->capa = 1; + } + if (array->size == INT32_MAX) { + return NULL; + } + // Check if the region is already present + for (int32_t i = 0; i < array->size; i++) { + if (array->data[i].iseq == iseq && array->data[i].replacement_idx == replacement_idx) { + return array; + } + } + if (array->size + 1 > array->capa) { + // Double the array's capacity. + int64_t double_capa = ((int64_t)array->capa) * 2; + int32_t new_capa = (int32_t)double_capa; + if (new_capa != double_capa) { + return NULL; + } + array = realloc(array, sizeof(*array) + new_capa * sizeof(struct compiled_region)); + if (array == NULL) { + return NULL; + } + array->capa = new_capa; + } + + int32_t size = array->size; + array->data[size].iseq = iseq; + array->data[size].replacement_idx = replacement_idx; + array->data[size].code = code; + array->size++; + return array; +} + +static int +add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing) +{ + ctx_t *ctx = (ctx_t *)data; + struct compiled_region_array *regions = NULL; + if (existing) { + regions = (struct compiled_region_array *)*value; + } + regions = add_compiled_region(regions, ctx->iseq, ctx->replacement_idx, ctx->region_start); + if (!regions) { + rb_bug("ujit: failed to add method lookup dependency"); // TODO: we could bail out of compiling instead + } + *value = (st_data_t)regions; + return ST_CONTINUE; +} + +// Store info to remember that the currently compiling region is only valid while cme and cc and valid. +static void +ujit_assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, ctx_t *ctx) +{ + st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)ctx); + st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)ctx); + // FIXME: This is a leak! When either the cme or the cc become invalid, the other also needs to go +} + +static int +ujit_root_mark_i(st_data_t k, st_data_t v, st_data_t ignore) +{ + // FIXME: This leaks everything that end up in the dependency table! + // One way to deal with this is with weak references... + rb_gc_mark((VALUE)k); + struct compiled_region_array *regions = (void *)v; + for (int32_t i = 0; i < regions->size; i++) { + rb_gc_mark((VALUE)regions->data[i].iseq); + } + + return ST_CONTINUE; +} + +// GC callback during mark phase +static void +ujit_root_mark(void *ptr) +{ + if (method_lookup_dependency) { + st_foreach(method_lookup_dependency, ujit_root_mark_i, 0); + } +} + +static void +ujit_root_free(void *ptr) +{ + // Do nothing. The root lives as long as the process. +} + +static size_t +ujit_root_memsize(const void *ptr) +{ + // Count off-gc-heap allocation size of the dependency table + return st_memsize(method_lookup_dependency); // TODO: more accurate accounting +} + +// Custom type for interacting with the GC +// TODO: compaction support +// TODO: make this write barrier protected +static const rb_data_type_t ujit_root_type = { + "ujit_root", + {ujit_root_mark, ujit_root_free, ujit_root_memsize, }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + static int opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc) { @@ -247,6 +391,8 @@ ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int* ne ctx.pc = NULL; ctx.stack_diff = 0; ctx.iseq = iseq; + ctx.region_start = code_ptr; + ctx.replacement_idx = insn_idx; // For each instruction to compile size_t num_instrs; @@ -483,6 +629,27 @@ gen_opt_minus(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) return true; } +// Verify that calling with cd on receiver goes to callee +static void +check_cfunc_dispatch(VALUE receiver, struct rb_call_data *cd, void *callee, rb_callable_method_entry_t *compile_time_cme) +{ + if (METHOD_ENTRY_INVALIDATED(compile_time_cme)) { + rb_bug("ujit: output code uses invalidated cme %p", (void *)compile_time_cme); + } + + bool callee_correct = false; + const rb_callable_method_entry_t *cme = rb_callable_method_entry(CLASS_OF(receiver), vm_ci_mid(cd->ci)); + if (cme->def->type == VM_METHOD_TYPE_CFUNC) { + const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); + if ((void *)cfunc->func == callee) { + callee_correct = true; + } + } + if (!callee_correct) { + rb_bug("ujit: output code calls wrong method cd->cc->klass: %p", (void *)cd->cc->klass); + } +} + MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key); bool @@ -524,21 +691,24 @@ gen_opt_send_without_block(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) } // Don't JIT if the inline cache is not set - if (cd->cc == vm_cc_empty()) - { - //printf("call cache is empty\n"); + if (!cd->cc || !cd->cc->klass) { return false; } - const rb_callable_method_entry_t *me = vm_cc_cme(cd->cc); + const rb_callable_method_entry_t *cme = vm_cc_cme(cd->cc); + + // Don't JIT if the method entry is out of date + if (METHOD_ENTRY_INVALIDATED(cme)) { + return false; + } // Don't JIT if this is not a C call - if (me->def->type != VM_METHOD_TYPE_CFUNC) + if (cme->def->type != VM_METHOD_TYPE_CFUNC) { return false; } - const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(me->def, body.cfunc); + const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc); // Don't JIT if the argument count doesn't match if (cfunc->argc < 0 || cfunc->argc != argc) @@ -586,24 +756,14 @@ gen_opt_send_without_block(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) // Pointer to the klass field of the receiver &(recv->klass) x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass)); - // Load the call cache pointer into REG1 - mov(cb, REG1, const_ptr_opnd(cd)); - x86opnd_t ptr_to_cc = member_opnd(REG1, struct rb_call_data, cc); - mov(cb, REG1, ptr_to_cc); - - // Check the class of the receiver against the call cache - mov(cb, REG0, klass_opnd); - cmp(cb, REG0, mem_opnd(64, REG1, offsetof(struct rb_callcache, klass))); + // Bail if receiver class is different from compiled time call cache class + mov(cb, REG1, imm_opnd(cd->cc->klass)); + cmp(cb, klass_opnd, REG1); jne_ptr(cb, side_exit); - // Check that the method entry is not invalidated - // cd->cc->cme->flags - // #define METHOD_ENTRY_INVALIDATED(me) ((me)->flags & IMEMO_FL_USER5) - x86opnd_t ptr_to_cme_ = mem_opnd(64, REG1, offsetof(struct rb_callcache, cme_)); - mov(cb, REG1, ptr_to_cme_); - x86opnd_t flags_opnd = mem_opnd(64, REG1, offsetof(rb_callable_method_entry_t, flags)); - test(cb, flags_opnd, imm_opnd(IMEMO_FL_USER5)); - jnz_ptr(cb, side_exit); + // Store incremented PC into current control frame in case callee raises. + mov(cb, REG0, const_ptr_opnd(ctx->pc + insn_len(BIN(opt_send_without_block)))); + mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0); // If this function needs a Ruby stack frame if (cfunc_needs_frame(cfunc)) @@ -619,6 +779,9 @@ gen_opt_send_without_block(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) // sp += 3 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3)); + // Put compile time cme into REG1. It's assumed to be valid because we are notified when + // any cme we depend on become outdated. See rb_ujit_method_lookup_change(). + mov(cb, REG1, const_ptr_opnd(cme)); // Write method entry at sp[-3] // sp[-3] = me; mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); @@ -661,6 +824,29 @@ gen_opt_send_without_block(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0); } + if (UJIT_CHECK_MODE > 0) { + // Verify that we are calling the right function + // Save MicroJIT registers + push(cb, REG_CFP); + push(cb, REG_EC); + push(cb, REG_SP); + // Maintain 16-byte RSP alignment + sub(cb, RSP, imm_opnd(8)); + + // Call check_cfunc_dispatch + mov(cb, RDI, recv); + mov(cb, RSI, const_ptr_opnd(cd)); + mov(cb, RDX, const_ptr_opnd((void *)cfunc->func)); + mov(cb, RCX, const_ptr_opnd(cme)); + call_ptr(cb, REG0, (void *)&check_cfunc_dispatch); + + // Restore registers + add(cb, RSP, imm_opnd(8)); + pop(cb, REG_SP); + pop(cb, REG_EC); + pop(cb, REG_CFP); + } + // Save the MicroJIT registers push(cb, REG_CFP); push(cb, REG_EC); @@ -687,8 +873,11 @@ gen_opt_send_without_block(codeblock_t* cb, codeblock_t* ocb, ctx_t* ctx) //print_str(cb, "before C call"); + ujit_assume_method_lookup_stable(cd->cc, cme, ctx); // Call the C function // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); + // cfunc comes from compile-time cme->def, which we assume to be stable. + // Invalidation logic is in rb_ujit_method_lookup_change() call_ptr(cb, REG0, (void*)cfunc->func); //print_str(cb, "after C call"); @@ -723,7 +912,7 @@ void rb_ujit_compile_iseq(const rb_iseq_t *iseq) { #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE - RB_VM_LOCK(); + RB_VM_LOCK_ENTER(); VALUE *encoded = (VALUE *)iseq->body->iseq_encoded; unsigned int insn_idx; @@ -745,10 +934,41 @@ rb_ujit_compile_iseq(const rb_iseq_t *iseq) } insn_idx += len; } - RB_VM_UNLOCK(); + RB_VM_LOCK_LEAVE(); #endif } +// Callback when cme or cc become invalid +void +rb_ujit_method_lookup_change(VALUE cme_or_cc) +{ + if (!method_lookup_dependency) return; + + RUBY_ASSERT(IMEMO_TYPE_P(cme_or_cc, imemo_ment) || IMEMO_TYPE_P(cme_or_cc, imemo_callcache)); + + st_data_t image; + if (st_lookup(method_lookup_dependency, (st_data_t)cme_or_cc, &image)) { + struct compiled_region_array *array = (void *)image; + // Invalidate all regions that depend on the cme or cc + for (int32_t i = 0; i < array->size; i++) { + struct compiled_region *region = &array->data[i]; + const struct rb_iseq_constant_body *body = region->iseq->body; + RUBY_ASSERT((unsigned int)region->replacement_idx < body->iseq_size); + // Restore region address to interpreter address in bytecode sequence + if (body->iseq_encoded[region->replacement_idx] == (VALUE)region->code) { + const void *const *code_threading_table = rb_vm_get_insns_address_table(); + int opcode = rb_vm_insn_addr2insn(region->code); + body->iseq_encoded[region->replacement_idx] = (VALUE)code_threading_table[opcode]; + if (UJIT_DUMP_MODE > 0) { + fprintf(stderr, "cc_or_cme=%p now out of date. Restored idx=%u in iseq=%p\n", (void *)cme_or_cc, (unsigned)region->replacement_idx, (void *)region->iseq); + } + } + } + + array->size = 0; + } +} + void rb_ujit_init(void) { @@ -783,4 +1003,9 @@ rb_ujit_init(void) st_insert(gen_fns, (st_data_t)BIN(setlocal_WC_0), (st_data_t)&gen_setlocal_wc0); st_insert(gen_fns, (st_data_t)BIN(opt_minus), (st_data_t)&gen_opt_minus); st_insert(gen_fns, (st_data_t)BIN(opt_send_without_block), (st_data_t)&gen_opt_send_without_block); + + method_lookup_dependency = st_init_numtable(); + struct ujit_root_struct *root; + VALUE ujit_root = TypedData_Make_Struct(0, struct ujit_root_struct, &ujit_root_type, root); + rb_gc_register_mark_object(ujit_root); } diff --git a/ujit_compile.h b/ujit_compile.h index 89ee85d938..4b2031d1a8 100644 --- a/ujit_compile.h +++ b/ujit_compile.h @@ -4,6 +4,7 @@ #include "stddef.h" #include "stdint.h" #include "stdbool.h" +#include "method.h" #ifndef rb_iseq_t typedef struct rb_iseq_struct rb_iseq_t; @@ -22,6 +23,7 @@ bool rb_ujit_enabled_p(void) #define UJIT_CALL_THRESHOLD (10u) +void rb_ujit_method_lookup_change(VALUE cme_or_cc); void rb_ujit_init(void); uint8_t *ujit_compile_insn(const rb_iseq_t *iseq, unsigned int insn_idx, unsigned int *next_ujit_idx); void rb_ujit_compile_iseq(const rb_iseq_t *iseq); diff --git a/vm_callinfo.h b/vm_callinfo.h index 03d9e2d2be..94b3bc3961 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -412,16 +412,7 @@ vm_cc_method_missing_reason_set(const struct rb_callcache *cc, enum method_missi *(enum method_missing_reason *)&cc->aux_.method_missing_reason = reason; } -static inline void -vm_cc_invalidate(const struct rb_callcache *cc) -{ - VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); - VM_ASSERT(cc != vm_cc_empty()); - VM_ASSERT(cc->klass != 0); // should be enable - - *(VALUE *)&cc->klass = 0; - RB_DEBUG_COUNTER_INC(cc_ent_invalidate); -} +void rb_vm_cc_invalidate(const struct rb_callcache *cc); /* calldata */ diff --git a/vm_method.c b/vm_method.c index e229d8b356..d63c7681ac 100644 --- a/vm_method.c +++ b/vm_method.c @@ -3,6 +3,7 @@ */ #include "id_table.h" +#include "ujit_compile.h" #define METHOD_DEBUG 0 @@ -113,6 +114,19 @@ rb_vm_mtbl_dump(const char *msg, VALUE klass, ID target_mid) vm_mtbl_dump(klass, target_mid); } +void +rb_vm_cc_invalidate(const struct rb_callcache *cc) +{ + VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); + VM_ASSERT(cc != vm_cc_empty()); + VM_ASSERT(cc->klass != 0); // should be enable + + *(VALUE *)&cc->klass = 0; + rb_ujit_method_lookup_change((VALUE)cc); + RB_DEBUG_COUNTER_INC(cc_ent_invalidate); +} + + static inline void vm_me_invalidate_cache(rb_callable_method_entry_t *cme) { @@ -120,6 +134,7 @@ vm_me_invalidate_cache(rb_callable_method_entry_t *cme) VM_ASSERT(callable_method_entry_p(cme)); METHOD_ENTRY_INVALIDATED_SET(cme); RB_DEBUG_COUNTER_INC(cc_cme_invalidate); + rb_ujit_method_lookup_change((VALUE)cme); } void