зеркало из https://github.com/github/ruby.git
Remove __bp__ and speed-up bmethod calls (#8060)
Remove rb_control_frame_t::__bp__ and optimize bmethod calls
This commit removes the __bp__ field from rb_control_frame_t. It was
introduced to help MJIT, but since MJIT was replaced by RJIT, we can use
vm_base_ptr() to compute it from the SP of the previous control frame
instead. Removing the field avoids needing to set it up when pushing new
frames.
Simply removing __bp__ would cause crashes since RJIT and YJIT used a
slightly different stack layout for bmethod calls than the interpreter.
At the moment of the call, the two layouts looked as follows:
┌────────────┐ ┌────────────┐
│ frame_base │ │ frame_base │
├────────────┤ ├────────────┤
│ ... │ │ ... │
├────────────┤ ├────────────┤
│ args │ │ args │
├────────────┤ └────────────┘<─prev_frame_sp
│ receiver │
prev_frame_sp─>└────────────┘
RJIT & YJIT interpreter
Essentially, vm_base_ptr() needs to compute the address to frame_base
given prev_frame_sp in the diagrams. The presence of the receiver
created an off-by-one situation.
Make the interpreter use the layout the JITs use for iseq-to-iseq
bmethod calls. Doing so removes unnecessary argument shifting and
vm_exec_core() re-entry from the interpreter, yielding a speed
improvement visible through `benchmark/vm_defined_method.yml`:
patched: 7578743.1 i/s
master: 4796596.3 i/s - 1.58x slower
C-to-iseq bmethod calls now store one more VALUE than before, but that
should have negligible impact on overall performance.
Note that re-entering vm_exec_core() used to be necessary for firing
TracePoint events, but that's no longer the case since
9121e57a5f
.
Closes ruby/ruby#6428
This commit is contained in:
Родитель
105bdba899
Коммит
f302e725e1
|
@ -5618,7 +5618,6 @@ module RubyVM::RJIT
|
|||
sp_reg = iseq ? SP : :rax
|
||||
asm.lea(sp_reg, [SP, C.VALUE.size * sp_offset])
|
||||
asm.mov([CFP, cfp_offset + C.rb_control_frame_t.offsetof(:sp)], sp_reg)
|
||||
asm.mov([CFP, cfp_offset + C.rb_control_frame_t.offsetof(:__bp__)], sp_reg) # TODO: get rid of this!!
|
||||
|
||||
# cfp->jit_return is used only for ISEQs
|
||||
if iseq
|
||||
|
|
|
@ -1064,7 +1064,6 @@ module RubyVM::RJIT # :nodoc: all
|
|||
self: [self.VALUE, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), self)")],
|
||||
ep: [CType::Pointer.new { self.VALUE }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), ep)")],
|
||||
block_code: [CType::Immediate.parse("void *"), Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), block_code)")],
|
||||
__bp__: [CType::Pointer.new { self.VALUE }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), __bp__)")],
|
||||
jit_return: [CType::Pointer.new { CType::Immediate.parse("void") }, Primitive.cexpr!("OFFSETOF((*((struct rb_control_frame_struct *)NULL)), jit_return)")],
|
||||
)
|
||||
end
|
||||
|
|
24
vm.c
24
vm.c
|
@ -198,7 +198,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
|
|||
{
|
||||
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
|
||||
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
|
||||
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
|
||||
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0);
|
||||
return cfp;
|
||||
}
|
||||
|
||||
|
@ -1398,7 +1398,7 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons
|
|||
static VALUE
|
||||
invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc)
|
||||
{
|
||||
/* bmethod */
|
||||
/* bmethod call from outside the VM */
|
||||
int arg_size = ISEQ_BODY(iseq)->param.size;
|
||||
VALUE ret;
|
||||
|
||||
|
@ -1408,7 +1408,7 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co
|
|||
VM_GUARDED_PREV_EP(captured->ep),
|
||||
(VALUE)me,
|
||||
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
|
||||
ec->cfp->sp + arg_size,
|
||||
ec->cfp->sp + 1 /* self */ + arg_size,
|
||||
ISEQ_BODY(iseq)->local_table_size - arg_size,
|
||||
ISEQ_BODY(iseq)->stack_max);
|
||||
|
||||
|
@ -1429,7 +1429,7 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
|
|||
const rb_cref_t *cref, int is_lambda, const rb_callable_method_entry_t *me)
|
||||
{
|
||||
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
|
||||
int i, opt_pc;
|
||||
int opt_pc;
|
||||
VALUE type = VM_FRAME_MAGIC_BLOCK | (is_lambda ? VM_FRAME_FLAG_LAMBDA : 0);
|
||||
rb_control_frame_t *cfp = ec->cfp;
|
||||
VALUE *sp = cfp->sp;
|
||||
|
@ -1448,14 +1448,18 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
|
|||
use_argv = vm_argv_ruby_array(av, argv, &flags, &argc, kw_splat);
|
||||
}
|
||||
|
||||
CHECK_VM_STACK_OVERFLOW(cfp, argc);
|
||||
CHECK_VM_STACK_OVERFLOW(cfp, argc + 1);
|
||||
vm_check_canary(ec, sp);
|
||||
cfp->sp = sp + argc;
|
||||
for (i=0; i<argc; i++) {
|
||||
sp[i] = use_argv[i];
|
||||
}
|
||||
|
||||
opt_pc = vm_yield_setup_args(ec, iseq, argc, sp, flags, passed_block_handler,
|
||||
VALUE *stack_argv = sp;
|
||||
if (me) {
|
||||
*sp = self; // bemthods need `self` on the VM stack
|
||||
stack_argv++;
|
||||
}
|
||||
cfp->sp = stack_argv + argc;
|
||||
MEMCPY(stack_argv, use_argv, VALUE, argc); // restrict: new stack space
|
||||
|
||||
opt_pc = vm_yield_setup_args(ec, iseq, argc, stack_argv, flags, passed_block_handler,
|
||||
(is_lambda ? arg_setup_method : arg_setup_block));
|
||||
cfp->sp = sp;
|
||||
|
||||
|
|
|
@ -815,7 +815,6 @@ typedef struct rb_control_frame_struct {
|
|||
VALUE self; /* cfp[3] / block[0] */
|
||||
const VALUE *ep; /* cfp[4] / block[1] */
|
||||
const void *block_code; /* cfp[5] / block[2] */ /* iseq or ifunc or forwarded block handler */
|
||||
VALUE *__bp__; /* cfp[6] */ /* outside vm_push_frame, use vm_base_ptr instead. */
|
||||
|
||||
#if VM_DEBUG_BP_CHECK
|
||||
VALUE *bp_check; /* cfp[7] */
|
||||
|
|
|
@ -281,14 +281,14 @@ rb_vmdebug_stack_dump_th(VALUE thval)
|
|||
|
||||
#if VMDEBUG > 2
|
||||
|
||||
/* copy from vm.c */
|
||||
/* copy from vm_insnhelper.c */
|
||||
static const VALUE *
|
||||
vm_base_ptr(const rb_control_frame_t *cfp)
|
||||
{
|
||||
const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
||||
const VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE;
|
||||
|
||||
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD) {
|
||||
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) {
|
||||
bp += 1;
|
||||
}
|
||||
return bp;
|
||||
|
|
|
@ -381,7 +381,6 @@ vm_push_frame(rb_execution_context_t *ec,
|
|||
.self = self,
|
||||
.ep = sp - 1,
|
||||
.block_code = NULL,
|
||||
.__bp__ = sp, /* Store initial value of ep as bp to skip calculation cost of bp on JIT cancellation. */
|
||||
#if VM_DEBUG_BP_CHECK
|
||||
.bp_check = sp,
|
||||
#endif
|
||||
|
@ -2455,15 +2454,15 @@ double_cmp_ge(double a, double b)
|
|||
return RBOOL(a >= b);
|
||||
}
|
||||
|
||||
// Copied by vm_dump.c
|
||||
static inline VALUE *
|
||||
vm_base_ptr(const rb_control_frame_t *cfp)
|
||||
{
|
||||
#if 0 // we may optimize and use this once we confirm it does not spoil performance on JIT.
|
||||
const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
||||
|
||||
if (cfp->iseq && VM_FRAME_RUBYFRAME_P(cfp)) {
|
||||
VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE;
|
||||
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD) {
|
||||
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) {
|
||||
/* adjust `self' */
|
||||
bp += 1;
|
||||
}
|
||||
|
@ -2480,9 +2479,6 @@ vm_base_ptr(const rb_control_frame_t *cfp)
|
|||
else {
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
return cfp->__bp__;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* method call processes with call_info */
|
||||
|
@ -3693,23 +3689,30 @@ vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct
|
|||
|
||||
const struct rb_captured_block *captured = &block->as.captured;
|
||||
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
|
||||
int i, opt_pc;
|
||||
|
||||
VALUE *sp = cfp->sp - calling->argc - 1;
|
||||
for (i = 0; i < calling->argc; i++) {
|
||||
sp[i] = sp[i+1];
|
||||
}
|
||||
VALUE * const argv = cfp->sp - calling->argc;
|
||||
const int arg_size = ISEQ_BODY(iseq)->param.size;
|
||||
|
||||
int opt_pc;
|
||||
if (vm_ci_flag(calling->ci) & VM_CALL_ARGS_SIMPLE) {
|
||||
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, sp, arg_setup_method);
|
||||
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, argv, arg_setup_method);
|
||||
}
|
||||
else {
|
||||
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, sp, arg_setup_method);
|
||||
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, argv, arg_setup_method);
|
||||
}
|
||||
|
||||
cfp->sp = sp;
|
||||
return invoke_bmethod(ec, iseq, calling->recv, captured, cme,
|
||||
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_LAMBDA, opt_pc);
|
||||
cfp->sp = argv - 1; // -1 for the receiver
|
||||
|
||||
vm_push_frame(ec, iseq,
|
||||
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA,
|
||||
calling->recv,
|
||||
VM_GUARDED_PREV_EP(captured->ep),
|
||||
(VALUE)cme,
|
||||
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
|
||||
argv + arg_size,
|
||||
ISEQ_BODY(iseq)->local_table_size - arg_size,
|
||||
ISEQ_BODY(iseq)->stack_max);
|
||||
|
||||
return Qundef;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
|
|
|
@ -4850,7 +4850,6 @@ fn gen_push_frame(
|
|||
if let Some(pc) = frame.pc {
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into());
|
||||
};
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BP), sp);
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp);
|
||||
let iseq: Opnd = if let Some(iseq) = frame.iseq {
|
||||
VALUE::from(iseq).into()
|
||||
|
|
|
@ -718,9 +718,8 @@ mod manual_defs {
|
|||
pub const RUBY_OFFSET_CFP_SELF: i32 = 24;
|
||||
pub const RUBY_OFFSET_CFP_EP: i32 = 32;
|
||||
pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40;
|
||||
pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__
|
||||
pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56;
|
||||
pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64;
|
||||
pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48;
|
||||
pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56;
|
||||
|
||||
// Constants from rb_execution_context_t vm_core.h
|
||||
pub const RUBY_OFFSET_EC_CFP: i32 = 16;
|
||||
|
|
Загрузка…
Ссылка в новой задаче