diff --git a/yjit_codegen.c b/yjit_codegen.c index d2b4e3ddbb..847bb57e0d 100644 --- a/yjit_codegen.c +++ b/yjit_codegen.c @@ -2253,23 +2253,18 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const //print_str(cb, "recv"); //print_ptr(cb, recv); - // If this function needs a Ruby stack frame - const bool push_frame = cfunc_needs_frame(cfunc); - // Create a size-exit to fall back to the interpreter uint8_t *side_exit = yjit_side_exit(jit, ctx); // Check for interrupts yjit_check_ints(cb, side_exit); - if (push_frame) { - // Stack overflow check - // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) - // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t) - lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t))); - cmp(cb, REG_CFP, REG0); - jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow)); - } + // Stack overflow check + // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) + // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t) + lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t))); + cmp(cb, REG_CFP, REG0); + jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow)); // Points to the receiver operand on the stack x86opnd_t recv = ctx_stack_opnd(ctx, argc); @@ -2277,72 +2272,70 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const // Store incremented PC into current control frame in case callee raises. jit_save_pc(jit, REG0); - if (push_frame) { - if (block) { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases - // with cfp->block_code. - jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block); - mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0); - } - - // Increment the stack pointer by 3 (in the callee) - // sp += 3 - lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3)); - - // Write method entry at sp[-3] - // sp[-3] = me; - // Put compile time cme into REG1. It's assumed to be valid because we are notified when - // any cme we depend on become outdated. See rb_yjit_method_lookup_change(). - jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme); - mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); - - // Write block handler at sp[-2] - // sp[-2] = block_handler; - if (block) { - // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); - lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self)); - or(cb, REG1, imm_opnd(1)); - mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); - } - else { - mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE)); - } - - // Write env flags at sp[-1] - // sp[-1] = frame_type; - uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; - mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type)); - - // Allocate a new CFP (ec->cfp--) - sub( - cb, - member_opnd(REG_EC, rb_execution_context_t, cfp), - imm_opnd(sizeof(rb_control_frame_t)) - ); - - // Setup the new frame - // *cfp = (const struct rb_control_frame_struct) { - // .pc = 0, - // .sp = sp, - // .iseq = 0, - // .self = recv, - // .ep = sp - 1, - // .block_code = 0, - // .__bp__ = sp, - // }; - mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp)); - mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0)); - mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0); - mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0)); - mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0)); - mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0); - sub(cb, REG0, imm_opnd(sizeof(VALUE))); - mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0); - mov(cb, REG0, recv); - mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0); + if (block) { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block); + mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0); } + // Increment the stack pointer by 3 (in the callee) + // sp += 3 + lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3)); + + // Write method entry at sp[-3] + // sp[-3] = me; + // Put compile time cme into REG1. It's assumed to be valid because we are notified when + // any cme we depend on become outdated. See rb_yjit_method_lookup_change(). + jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme); + mov(cb, mem_opnd(64, REG0, 8 * -3), REG1); + + // Write block handler at sp[-2] + // sp[-2] = block_handler; + if (block) { + // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp)); + lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self)); + or(cb, REG1, imm_opnd(1)); + mov(cb, mem_opnd(64, REG0, 8 * -2), REG1); + } + else { + mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE)); + } + + // Write env flags at sp[-1] + // sp[-1] = frame_type; + uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; + mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type)); + + // Allocate a new CFP (ec->cfp--) + sub( + cb, + member_opnd(REG_EC, rb_execution_context_t, cfp), + imm_opnd(sizeof(rb_control_frame_t)) + ); + + // Setup the new frame + // *cfp = (const struct rb_control_frame_struct) { + // .pc = 0, + // .sp = sp, + // .iseq = 0, + // .self = recv, + // .ep = sp - 1, + // .block_code = 0, + // .__bp__ = sp, + // }; + mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp)); + mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0)); + mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0); + mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0)); + mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0)); + mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0); + sub(cb, REG0, imm_opnd(sizeof(VALUE))); + mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0); + mov(cb, REG0, recv); + mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0); + // Verify that we are calling the right function if (YJIT_CHECK_MODE > 0) { // Save YJIT registers @@ -2407,15 +2400,12 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN); mov(cb, stack_ret, RAX); - // If this function needs a Ruby stack frame - if (push_frame) { - // Pop the stack frame (ec->cfp++) - add( - cb, - member_opnd(REG_EC, rb_execution_context_t, cfp), - imm_opnd(sizeof(rb_control_frame_t)) - ); - } + // Pop the stack frame (ec->cfp++) + add( + cb, + member_opnd(REG_EC, rb_execution_context_t, cfp), + imm_opnd(sizeof(rb_control_frame_t)) + ); // Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0 // after the call, while this does not. This difference prevents @@ -2463,6 +2453,30 @@ iseq_lead_only_arg_setup_p(const rb_iseq_t *iseq) bool rb_iseq_only_optparam_p(const rb_iseq_t *iseq); bool rb_iseq_only_kwparam_p(const rb_iseq_t *iseq); +// If true, the iseq is leaf and it can be replaced by a single C call. +static bool +rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq) +{ + unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave)); + unsigned int leave_len = insn_len(BIN(leave)); + + return iseq->body->iseq_size == ( + (invokebuiltin_len + leave_len) && + rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) && + rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) && + iseq->body->builtin_inline_p + ); + } + +// Return an rb_builtin_function if the iseq contains only that leaf builtin function. +static const struct rb_builtin_function* +rb_leaf_builtin_function(const rb_iseq_t *iseq) +{ + if (!rb_leaf_invokebuiltin_iseq_p(iseq)) + return NULL; + return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1]; +} + static codegen_status_t gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc) { @@ -2529,6 +2543,39 @@ gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const r // Check for interrupts yjit_check_ints(cb, side_exit); + const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq); + + if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) { + // TODO: figure out if this is necessary + // If the calls don't allocate, do they need up to date PC, SP? + // Save YJIT registers + yjit_save_regs(cb); + + // Get a pointer to the top of the stack + lea(cb, REG0, ctx_stack_opnd(ctx, 0)); + + // Call the builtin func (ec, recv, arg1, arg2, ...) + mov(cb, C_ARG_REGS[0], REG_EC); + + // Copy self and arguments + for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) { + x86opnd_t stack_opnd = mem_opnd(64, REG0, -(leaf_builtin->argc - i) * SIZEOF_VALUE); + x86opnd_t c_arg_reg = C_ARG_REGS[i + 1]; + mov(cb, c_arg_reg, stack_opnd); + } + ctx_stack_pop(ctx, leaf_builtin->argc + 1); + call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr); + + // Load YJIT registers + yjit_load_regs(cb); + + // Push the return value + x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN); + mov(cb, stack_ret, RAX); + + return YJIT_KEEP_COMPILING; + } + // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) ADD_COMMENT(cb, "stack overflow check"); diff --git a/yjit_iface.c b/yjit_iface.c index 01075ea1ee..9e1085a788 100644 --- a/yjit_iface.c +++ b/yjit_iface.c @@ -126,19 +126,6 @@ check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_ca MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key); -bool -cfunc_needs_frame(const rb_method_cfunc_t *cfunc) -{ - void* fptr = (void*)cfunc->func; - - // Leaf C functions do not need a stack frame - // or a stack overflow check - return !( - // Hash#key? - fptr == (void*)rb_hash_has_key - ); -} - // GC root for interacting with the GC struct yjit_root_struct { int unused; // empty structs are not legal in C99 diff --git a/yjit_iface.h b/yjit_iface.h index faae80cf15..c1f28a5e65 100644 --- a/yjit_iface.h +++ b/yjit_iface.h @@ -100,7 +100,6 @@ VALUE *yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx); int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc); void check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_callable_method_entry_t *compile_time_cme); -bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc); RBIMPL_ATTR_NODISCARD() bool assume_bop_not_redefined(block_t *block, int redefined_flag, enum ruby_basic_operators bop); void assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry_t *cme, block_t *block);