зеркало из https://github.com/github/ruby.git
Tune codegen for rb_yield() calls landing in ISeqs
Unlike in older revisions in the year, GCC 11 isn't inlining the call to vm_push_frame() inside invoke_iseq_block_from_c() anymore. We do want it to be inlined since rb_yield() speed is fairly important. Logs from -fopt-info-optimized-inline reveal that GCC was blowing its code size budget inlining invoke_block_from_c_bh() into its various callers, leaving suboptimal code for its body. Take away some uses of the `inline` keyword and merge a common tail call to vm_exec() for overall better code. This tweak gives about 18% on a micro benchmark and 1% on the chunky-png benchmark from yjit-bench. I tested on a Skylake server. ``` $ cat c-to-ruby-call.yml benchmark: - 0.upto(10_000_000) {} $ benchmark-driver --chruby '+patch;master' c-to-ruby-call.yml Warming up -------------------------------------- 0.upto(10_000_000) {} 2.299 i/s - 3.000 times in 1.304689s (434.90ms/i) Calculating ------------------------------------- +patch master 0.upto(10_000_000) {} 2.299 1.943 i/s - 6.000 times in 2.609393s 3.088353s Comparison: 0.upto(10_000_000) {} +patch: 2.3 i/s master: 1.9 i/s - 1.18x slower $ ruby run_benchmarks.rb --chruby 'master;+patch' chunky-png <snip> ---------- ----------- ---------- ----------- ---------- -------------- ------------- bench master (ms) stddev (%) +patch (ms) stddev (%) +patch 1st itr master/+patch chunky-png 1156.1 0.1 1142.2 0.2 1.01 1.01 ---------- ----------- ---------- ----------- ---------- -------------- ------------- ```
This commit is contained in:
Родитель
e271feb866
Коммит
34715bdd91
17
vm.c
17
vm.c
|
@ -1509,7 +1509,7 @@ rb_binding_add_dynavars(VALUE bindval, rb_binding_t *bind, int dyncount, const I
|
|||
|
||||
/* C -> Ruby: block */
|
||||
|
||||
static inline VALUE
|
||||
static inline void
|
||||
invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_cref_t *cref, VALUE type, int opt_pc)
|
||||
{
|
||||
int arg_size = ISEQ_BODY(iseq)->param.size;
|
||||
|
@ -1521,15 +1521,13 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons
|
|||
ec->cfp->sp + arg_size,
|
||||
ISEQ_BODY(iseq)->local_table_size - arg_size,
|
||||
ISEQ_BODY(iseq)->stack_max);
|
||||
return vm_exec(ec);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
static inline void
|
||||
invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc)
|
||||
{
|
||||
/* bmethod call from outside the VM */
|
||||
int arg_size = ISEQ_BODY(iseq)->param.size;
|
||||
VALUE ret;
|
||||
|
||||
VM_ASSERT(me->def->type == VM_METHOD_TYPE_BMETHOD);
|
||||
|
||||
|
@ -1542,9 +1540,6 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co
|
|||
ISEQ_BODY(iseq)->stack_max);
|
||||
|
||||
VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);
|
||||
ret = vm_exec(ec);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE(static VALUE
|
||||
|
@ -1591,14 +1586,16 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
|
|||
cfp->sp = sp;
|
||||
|
||||
if (me == NULL) {
|
||||
return invoke_block(ec, iseq, self, captured, cref, type, opt_pc);
|
||||
invoke_block(ec, iseq, self, captured, cref, type, opt_pc);
|
||||
}
|
||||
else {
|
||||
return invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc);
|
||||
invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc);
|
||||
}
|
||||
|
||||
return vm_exec(ec);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
static VALUE
|
||||
invoke_block_from_c_bh(rb_execution_context_t *ec, VALUE block_handler,
|
||||
int argc, const VALUE *argv,
|
||||
int kw_splat, VALUE passed_block_handler, const rb_cref_t *cref,
|
||||
|
|
|
@ -3751,7 +3751,7 @@ vm_method_cfunc_entry(const rb_callable_method_entry_t *me)
|
|||
return UNALIGNED_MEMBER_PTR(me->def, body.cfunc);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
static VALUE
|
||||
vm_call_cfunc_with_frame_(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling,
|
||||
int argc, VALUE *argv, VALUE *stack_bottom)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче