From 9d8cc01b758f9385bd4c806f3daff9719e07faa0 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert Date: Tue, 9 Feb 2021 16:24:06 -0500 Subject: [PATCH] WIP JIT-to-JIT returns --- ujit_codegen.c | 79 ++++++++++++++++++++++++++++++++++++++++++------- ujit_core.c | 22 +++++++++----- ujit_core.h | 5 +++- vm.c | 2 +- vm_core.h | 2 ++ vm_insnhelper.c | 1 + 6 files changed, 91 insertions(+), 20 deletions(-) diff --git a/ujit_codegen.c b/ujit_codegen.c index b9d2a0a7cb..4a2a4c865e 100644 --- a/ujit_codegen.c +++ b/ujit_codegen.c @@ -114,7 +114,7 @@ Compile an interpreter entry block to be inserted into an iseq Returns `NULL` if compilation fails. */ uint8_t* -ujit_entry_prologue() +ujit_entry_prologue(void) { RUBY_ASSERT(cb != NULL); @@ -248,9 +248,9 @@ gen_dup(jitstate_t* jit, ctx_t* ctx) x86opnd_t dup_val = ctx_stack_pop(ctx, 1); x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE); x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE); - mov(cb, RAX, dup_val); - mov(cb, loc0, RAX); - mov(cb, loc1, RAX); + mov(cb, REG0, dup_val); + mov(cb, loc0, REG0); + mov(cb, loc1, REG0); return true; } @@ -1191,6 +1191,23 @@ gen_opt_swb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const r bool rb_simple_iseq_p(const rb_iseq_t *iseq); +void +gen_return_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape) +{ + switch (shape) + { + case SHAPE_NEXT0: + case SHAPE_NEXT1: + RUBY_ASSERT(false); + break; + + case SHAPE_DEFAULT: + mov(cb, REG0, const_ptr_opnd(target0)); + mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0); + break; + } +} + static bool gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc) { @@ -1251,13 +1268,32 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb cmp(cb, klass_opnd, REG1); jne_ptr(cb, side_exit); - // Store incremented PC into current control frame in case callee raises. + // Store the updated SP on the current frame (pop arguments and receiver) + lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); + mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0); + + // Store the next PC i the current frame mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block)))); mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0); - // Store the updated SP on the CFP (pop arguments and receiver) - lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); - mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0); + // Stub so we can return to JITted code + blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) }; + + // Pop arguments and receiver in return context, push the return value + // After the return, the JIT and interpreter SP will match up + ctx_t return_ctx = *ctx; + ctx_stack_pop(&return_ctx, argc); + return_ctx.sp_offset = 0; + + // Write the JIT return address on the current frame + gen_branch( + ctx, + return_block, + &return_ctx, + return_block, + &return_ctx, + gen_return_branch + ); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) @@ -1327,7 +1363,6 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb &DEFAULT_CTX, (blockid_t){ iseq, 0 } ); - // TODO: create stub for call continuation @@ -1432,7 +1467,31 @@ gen_leave(jitstate_t* jit, ctx_t* ctx) mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp)); mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0); - // Write the post call bytes + + + + + + + // Load the JIT return address + mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, jit_return)); + + // If the return address is NULL, fall back to the interpreter + int FALLBACK_LABEL = cb_new_label(cb, "FALLBACK"); + cmp(cb, REG0, imm_opnd(0)); + jz(cb, FALLBACK_LABEL); + + // Jump to the JIT return address + jmp_rm(cb, REG0); + + // Fall back to the interpreter + cb_write_label(cb, FALLBACK_LABEL); + cb_link_labels(cb); + + + + + cb_write_post_call_bytes(cb); return true; diff --git a/ujit_core.c b/ujit_core.c index d7b0d65e69..105769955c 100644 --- a/ujit_core.c +++ b/ujit_core.c @@ -32,7 +32,7 @@ Get an operand for the adjusted stack pointer address x86opnd_t ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes) { - int32_t offset = (ctx->stack_size) * sizeof(VALUE) + offset_bytes; + int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes; return mem_opnd(64, REG_SP, offset); } @@ -49,9 +49,10 @@ ctx_stack_push(ctx_t* ctx, int type) ctx->temp_types[ctx->stack_size] = type; ctx->stack_size += 1; + ctx->sp_offset += 1; // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE); return mem_opnd(64, REG_SP, offset); } @@ -65,7 +66,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n) RUBY_ASSERT(n <= ctx->stack_size); // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE); x86opnd_t top = mem_opnd(64, REG_SP, offset); // Clear the types of the popped values @@ -77,6 +78,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n) } ctx->stack_size -= n; + ctx->sp_offset -= n; return top; } @@ -88,7 +90,7 @@ x86opnd_t ctx_stack_opnd(ctx_t* ctx, int32_t idx) { // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1 - idx) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE); x86opnd_t opnd = mem_opnd(64, REG_SP, offset); return opnd; @@ -120,6 +122,9 @@ int ctx_diff(const ctx_t* src, const ctx_t* dst) if (dst->stack_size != src->stack_size) return INT_MAX; + if (dst->sp_offset != src->sp_offset) + return INT_MAX; + if (dst->self_is_object != src->self_is_object) return INT_MAX; @@ -345,6 +350,7 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) // Limit the number of block versions ctx_t generic_ctx = DEFAULT_CTX; generic_ctx.stack_size = target_ctx->stack_size; + generic_ctx.sp_offset = target_ctx->sp_offset; if (count_block_versions(target) >= MAX_VERSIONS - 1) { fprintf(stderr, "version limit hit in branch_stub_hit\n"); @@ -383,7 +389,6 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) } // Get a version or stub corresponding to a branch target -// TODO: need incoming and target contexts uint8_t* get_branch_target( blockid_t target, const ctx_t* ctx, @@ -440,13 +445,13 @@ void gen_branch( ) { RUBY_ASSERT(target0.iseq != NULL); - RUBY_ASSERT(target1.iseq != NULL); + //RUBY_ASSERT(target1.iseq != NULL); RUBY_ASSERT(num_branches < MAX_BRANCHES); uint32_t branch_idx = num_branches++; // Get the branch targets or stubs uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0); - uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1); + uint8_t* dst_addr1 = ctx1? get_branch_target(target1, ctx1, branch_idx, 1):NULL; // Call the branch generation function uint32_t start_pos = cb->write_pos; @@ -459,7 +464,7 @@ void gen_branch( end_pos, *src_ctx, { target0, target1 }, - { *ctx0, *ctx1 }, + { *ctx0, ctx1? *ctx1:DEFAULT_CTX }, { dst_addr0, dst_addr1 }, gen_fn, SHAPE_DEFAULT @@ -508,6 +513,7 @@ void gen_direct_jump( // Limit the number of block versions ctx_t generic_ctx = DEFAULT_CTX; generic_ctx.stack_size = ctx->stack_size; + generic_ctx.sp_offset = ctx->sp_offset; if (count_block_versions(target0) >= MAX_VERSIONS - 1) { fprintf(stderr, "version limit hit in branch_stub_hit\n"); diff --git a/ujit_core.h b/ujit_core.h index 08fdd4d779..9430269438 100644 --- a/ujit_core.h +++ b/ujit_core.h @@ -31,9 +31,12 @@ typedef struct CtxStruct // T_NONE==0 is the unknown type uint8_t temp_types[MAX_TEMP_TYPES]; - // Number of values pushed on the temporary stack + // Number of values currently on the temporary stack uint16_t stack_size; + // Offset of the JIT SP relative to the interpreter SP + int16_t sp_offset; + // Whether we know self is a heap object bool self_is_object : 1; diff --git a/vm.c b/vm.c index e98f89864b..86a78bc8d5 100644 --- a/vm.c +++ b/vm.c @@ -202,7 +202,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured) { rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3)); VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp)); - VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0); + VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0); return cfp; } diff --git a/vm_core.h b/vm_core.h index 6b627f4a82..23ebf37b1a 100644 --- a/vm_core.h +++ b/vm_core.h @@ -790,6 +790,8 @@ typedef struct rb_control_frame_struct { #if VM_DEBUG_BP_CHECK VALUE *bp_check; /* cfp[7] */ #endif + // Return address for uJIT code + void *jit_return; } rb_control_frame_t; extern const rb_data_type_t ruby_threadptr_data_type; diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 7075f7c0f2..f743e07e03 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -390,6 +390,7 @@ vm_push_frame(rb_execution_context_t *ec, #if VM_DEBUG_BP_CHECK .bp_check = sp, #endif + .jit_return = NULL }; ec->cfp = cfp;