Remove __bp__ and speed-up bmethod calls (#8060)

Remove rb_control_frame_t::__bp__ and optimize bmethod calls

This commit removes the __bp__ field from rb_control_frame_t. It was
introduced to help MJIT, but since MJIT was replaced by RJIT, we can use
vm_base_ptr() to compute it from the SP of the previous control frame
instead. Removing the field avoids needing to set it up when pushing new
frames.

Simply removing __bp__ would cause crashes since RJIT and YJIT used a
slightly different stack layout for bmethod calls than the interpreter.
At the moment of the call, the two layouts looked as follows:

                   ┌────────────┐    ┌────────────┐
                   │ frame_base │    │ frame_base │
                   ├────────────┤    ├────────────┤
                   │    ...     │    │    ...     │
                   ├────────────┤    ├────────────┤
                   │    args    │    │    args    │
                   ├────────────┤    └────────────┘<─prev_frame_sp
                   │  receiver  │
    prev_frame_sp─>└────────────┘
                     RJIT & YJIT      interpreter

Essentially, vm_base_ptr() needs to compute the address to frame_base
given prev_frame_sp in the diagrams. The presence of the receiver
created an off-by-one situation.

Make the interpreter use the layout the JITs use for iseq-to-iseq
bmethod calls. Doing so removes unnecessary argument shifting and
vm_exec_core() re-entry from the interpreter, yielding a speed
improvement visible through `benchmark/vm_defined_method.yml`:

     patched:   7578743.1 i/s
      master:   4796596.3 i/s - 1.58x  slower

C-to-iseq bmethod calls now store one more VALUE than before, but that
should have negligible impact on overall performance.

Note that re-entering vm_exec_core() used to be necessary for firing
TracePoint events, but that's no longer the case since
9121e57a5f.

Closes ruby/ruby#6428
This commit is contained in:
Alan Wu 2023-07-17 13:57:58 -04:00 коммит произвёл GitHub
Родитель 105bdba899
Коммит f302e725e1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 38 добавлений и 36 удалений

Просмотреть файл

@ -5618,7 +5618,6 @@ module RubyVM::RJIT
sp_reg = iseq ? SP : :rax
asm.lea(sp_reg, [SP, C.VALUE.size * sp_offset])
asm.mov([CFP, cfp_offset + C.rb_control_frame_t.offsetof(:sp)], sp_reg)
asm.mov([CFP, cfp_offset + C.rb_control_frame_t.offsetof(:__bp__)], sp_reg) # TODO: get rid of this!!
# cfp->jit_return is used only for ISEQs
if iseq

Просмотреть файл

@ -1064,7 +1064,6 @@ module RubyVM::RJIT # :nodoc: all
self: [self.VALUE, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), self)")],
ep: [CType::Pointer.new { self.VALUE }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), ep)")],
block_code: [CType::Immediate.parse("void *"), Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), block_code)")],
__bp__: [CType::Pointer.new { self.VALUE }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), __bp__)")],
jit_return: [CType::Pointer.new { CType::Immediate.parse("void") }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), jit_return)")],
)
end

24
vm.c
Просмотреть файл

@ -198,7 +198,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
{
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0);
return cfp;
}
@ -1398,7 +1398,7 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons
static VALUE
invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc)
{
/* bmethod */
/* bmethod call from outside the VM */
int arg_size = ISEQ_BODY(iseq)->param.size;
VALUE ret;
@ -1408,7 +1408,7 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co
VM_GUARDED_PREV_EP(captured->ep),
(VALUE)me,
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
ec->cfp->sp + arg_size,
ec->cfp->sp + 1 /* self */ + arg_size,
ISEQ_BODY(iseq)->local_table_size - arg_size,
ISEQ_BODY(iseq)->stack_max);
@ -1429,7 +1429,7 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
const rb_cref_t *cref, int is_lambda, const rb_callable_method_entry_t *me)
{
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
int i, opt_pc;
int opt_pc;
VALUE type = VM_FRAME_MAGIC_BLOCK | (is_lambda ? VM_FRAME_FLAG_LAMBDA : 0);
rb_control_frame_t *cfp = ec->cfp;
VALUE *sp = cfp->sp;
@ -1448,14 +1448,18 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
use_argv = vm_argv_ruby_array(av, argv, &flags, &argc, kw_splat);
}
CHECK_VM_STACK_OVERFLOW(cfp, argc);
CHECK_VM_STACK_OVERFLOW(cfp, argc + 1);
vm_check_canary(ec, sp);
cfp->sp = sp + argc;
for (i=0; i<argc; i++) {
sp[i] = use_argv[i];
}
opt_pc = vm_yield_setup_args(ec, iseq, argc, sp, flags, passed_block_handler,
VALUE *stack_argv = sp;
if (me) {
*sp = self; // bemthods need `self` on the VM stack
stack_argv++;
}
cfp->sp = stack_argv + argc;
MEMCPY(stack_argv, use_argv, VALUE, argc); // restrict: new stack space
opt_pc = vm_yield_setup_args(ec, iseq, argc, stack_argv, flags, passed_block_handler,
(is_lambda ? arg_setup_method : arg_setup_block));
cfp->sp = sp;

Просмотреть файл

@ -815,7 +815,6 @@ typedef struct rb_control_frame_struct {
VALUE self; /* cfp[3] / block[0] */
const VALUE *ep; /* cfp[4] / block[1] */
const void *block_code; /* cfp[5] / block[2] */ /* iseq or ifunc or forwarded block handler */
VALUE *__bp__; /* cfp[6] */ /* outside vm_push_frame, use vm_base_ptr instead. */
#if VM_DEBUG_BP_CHECK
VALUE *bp_check; /* cfp[7] */

Просмотреть файл

@ -281,14 +281,14 @@ rb_vmdebug_stack_dump_th(VALUE thval)
#if VMDEBUG > 2
/* copy from vm.c */
/* copy from vm_insnhelper.c */
static const VALUE *
vm_base_ptr(const rb_control_frame_t *cfp)
{
const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
const VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE;
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD) {
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) {
bp += 1;
}
return bp;

Просмотреть файл

@ -381,7 +381,6 @@ vm_push_frame(rb_execution_context_t *ec,
.self = self,
.ep = sp - 1,
.block_code = NULL,
.__bp__ = sp, /* Store initial value of ep as bp to skip calculation cost of bp on JIT cancellation. */
#if VM_DEBUG_BP_CHECK
.bp_check = sp,
#endif
@ -2455,15 +2454,15 @@ double_cmp_ge(double a, double b)
return RBOOL(a >= b);
}
// Copied by vm_dump.c
static inline VALUE *
vm_base_ptr(const rb_control_frame_t *cfp)
{
#if 0 // we may optimize and use this once we confirm it does not spoil performance on JIT.
const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
if (cfp->iseq && VM_FRAME_RUBYFRAME_P(cfp)) {
VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE;
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD) {
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) {
/* adjust `self' */
bp += 1;
}
@ -2480,9 +2479,6 @@ vm_base_ptr(const rb_control_frame_t *cfp)
else {
return NULL;
}
#else
return cfp->__bp__;
#endif
}
/* method call processes with call_info */
@ -3693,23 +3689,30 @@ vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct
const struct rb_captured_block *captured = &block->as.captured;
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
int i, opt_pc;
VALUE *sp = cfp->sp - calling->argc - 1;
for (i = 0; i < calling->argc; i++) {
sp[i] = sp[i+1];
}
VALUE * const argv = cfp->sp - calling->argc;
const int arg_size = ISEQ_BODY(iseq)->param.size;
int opt_pc;
if (vm_ci_flag(calling->ci) & VM_CALL_ARGS_SIMPLE) {
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, sp, arg_setup_method);
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, argv, arg_setup_method);
}
else {
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, sp, arg_setup_method);
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, argv, arg_setup_method);
}
cfp->sp = sp;
return invoke_bmethod(ec, iseq, calling->recv, captured, cme,
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_LAMBDA, opt_pc);
cfp->sp = argv - 1; // -1 for the receiver
vm_push_frame(ec, iseq,
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA,
calling->recv,
VM_GUARDED_PREV_EP(captured->ep),
(VALUE)cme,
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
argv + arg_size,
ISEQ_BODY(iseq)->local_table_size - arg_size,
ISEQ_BODY(iseq)->stack_max);
return Qundef;
}
static VALUE

Просмотреть файл

@ -4850,7 +4850,6 @@ fn gen_push_frame(
if let Some(pc) = frame.pc {
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into());
};
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BP), sp);
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp);
let iseq: Opnd = if let Some(iseq) = frame.iseq {
VALUE::from(iseq).into()

Просмотреть файл

@ -718,9 +718,8 @@ mod manual_defs {
pub const RUBY_OFFSET_CFP_SELF: i32 = 24;
pub const RUBY_OFFSET_CFP_EP: i32 = 32;
pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40;
pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__
pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56;
pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64;
pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48;
pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56;
// Constants from rb_execution_context_t vm_core.h
pub const RUBY_OFFSET_EC_CFP: i32 = 16;