зеркало из https://github.com/github/ruby.git
Optimize symproc calls
Similar to the bmethod/send optimization, this avoids using CALLER_ARG_SPLAT if not necessary. As long as the receiver argument can be shifted off, other arguments are passed through as-is. This optimizes the following types of calls: * symproc.(recv) ~5% * symproc.(recv, *args) ~65% for args.length == 200 * symproc.(recv, *args, **kw) ~45% for args.length == 200 * symproc.(recv, **kw) ~30% * symproc.(recv, kw: 1) ~100% Note that empty argument splats do get slower with this approach, by about 2-3%. This is probably because iseq argument setup is slower for empty argument splats than CALLER_SETUP_ARG is. Other than non-empty argument splats, other argument splats are faster, with the speedup depending on the number of arguments. The following types of calls are not optimized: * symproc.(*args) * symproc.(*args, **kw) This is because the you cannot shift the receiver argument off without first splatting the arg.
This commit is contained in:
Родитель
9b4bf02aa8
Коммит
583e9d24d4
|
@ -0,0 +1,83 @@
|
|||
prelude: |
|
||||
def self.a0; end
|
||||
def self.a1(a) a; end
|
||||
def self.s(*a) a; end
|
||||
def self.b(kw: 1) kw end
|
||||
def self.sb(*a, kw: 1) kw end
|
||||
|
||||
t0 = 0.times.to_a
|
||||
t1 = 1.times.to_a
|
||||
t10 = 10.times.to_a
|
||||
t200 = 200.times.to_a
|
||||
|
||||
a0_t0 = [self, *t0]
|
||||
a1_t1 = [self, *t1]
|
||||
s_t0 = [self, *t0]
|
||||
s_t1 = [self, *t1]
|
||||
s_t10 = [self, *t10]
|
||||
s_t200 = [self, *t200]
|
||||
sb_t0 = [self, *t0]
|
||||
sb_t1 = [self, *t1]
|
||||
sb_t10 = [self, *t10]
|
||||
sb_t200 = [self, *t200]
|
||||
kw = {kw: 2}
|
||||
|
||||
a0 = :a0.to_proc
|
||||
a1 = :a1.to_proc
|
||||
s = :s.to_proc
|
||||
b = :b.to_proc
|
||||
sb = :sb.to_proc
|
||||
benchmark:
|
||||
symproc_simple_0: |
|
||||
a0.(self)
|
||||
symproc_simple_1: |
|
||||
a1.(self, 1)
|
||||
symproc_simple_0_splat: |
|
||||
a0.(self, *t0)
|
||||
symproc_simple_1_splat: |
|
||||
a1.(self, *t1)
|
||||
symproc_simple_0_splat_comb: |
|
||||
a0.(*a0_t0)
|
||||
symproc_simple_1_splat_comb: |
|
||||
a1.(*a1_t1)
|
||||
symproc_no_splat: |
|
||||
s.(self)
|
||||
symproc_0_splat: |
|
||||
s.(self, *t0)
|
||||
symproc_1_splat: |
|
||||
s.(self, *t1)
|
||||
symproc_10_splat: |
|
||||
s.(self, *t10)
|
||||
symproc_200_splat: |
|
||||
s.(self, *t200)
|
||||
symproc_0_splat_comb: |
|
||||
s.(*s_t0)
|
||||
symproc_1_splat_comb: |
|
||||
s.(*s_t1)
|
||||
symproc_10_splat_comb: |
|
||||
s.(*s_t10)
|
||||
symproc_200_splat_comb: |
|
||||
s.(*s_t200)
|
||||
symproc_kw: |
|
||||
b.(self, kw: 1)
|
||||
symproc_no_kw: |
|
||||
b.(self)
|
||||
symproc_kw_splat: |
|
||||
b.(self, **kw)
|
||||
symproc_0_splat_kw: |
|
||||
sb.(self, *t0, **kw)
|
||||
symproc_1_splat_kw: |
|
||||
sb.(self, *t1, **kw)
|
||||
symproc_10_splat_kw: |
|
||||
sb.(self, *t10, **kw)
|
||||
symproc_200_splat_kw: |
|
||||
sb.(self, *t200, **kw)
|
||||
symproc_0_splat_comb_kw: |
|
||||
sb.(*sb_t0, **kw)
|
||||
symproc_1_splat_comb_kw: |
|
||||
sb.(*sb_t1, **kw)
|
||||
symproc_10_splat_comb_kw: |
|
||||
sb.(*sb_t10, **kw)
|
||||
symproc_200_splat_comb_kw: |
|
||||
sb.(*sb_t200, **kw)
|
||||
loop_count: 1000000
|
|
@ -4836,20 +4836,37 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|||
MAYBE_UNUSED(bool is_lambda), VALUE block_handler)
|
||||
{
|
||||
VALUE symbol = VM_BH_TO_SYMBOL(block_handler);
|
||||
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV);
|
||||
int flags = 0;
|
||||
if (UNLIKELY(calling->heap_argv)) {
|
||||
int flags = vm_ci_flag(ci);
|
||||
|
||||
if (UNLIKELY(!(flags & VM_CALL_ARGS_SIMPLE) &&
|
||||
((calling->argc == 0) ||
|
||||
(calling->argc == 1 && (flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT))) ||
|
||||
(calling->argc == 2 && (flags & VM_CALL_ARGS_SPLAT) && (flags & VM_CALL_KW_SPLAT)) ||
|
||||
((flags & VM_CALL_KWARG) && (vm_ci_kwarg(ci)->keyword_len == calling->argc))))) {
|
||||
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV);
|
||||
flags = 0;
|
||||
if (UNLIKELY(calling->heap_argv)) {
|
||||
#if VM_ARGC_STACK_MAX < 0
|
||||
if (RARRAY_LEN(calling->heap_argv) < 1) {
|
||||
rb_raise(rb_eArgError, "no receiver given");
|
||||
}
|
||||
if (RARRAY_LEN(calling->heap_argv) < 1) {
|
||||
rb_raise(rb_eArgError, "no receiver given");
|
||||
}
|
||||
#endif
|
||||
calling->recv = rb_ary_shift(calling->heap_argv);
|
||||
// Modify stack to avoid cfp consistency error
|
||||
reg_cfp->sp++;
|
||||
reg_cfp->sp[-1] = reg_cfp->sp[-2];
|
||||
reg_cfp->sp[-2] = calling->recv;
|
||||
flags |= VM_CALL_ARGS_SPLAT;
|
||||
calling->recv = rb_ary_shift(calling->heap_argv);
|
||||
// Modify stack to avoid cfp consistency error
|
||||
reg_cfp->sp++;
|
||||
reg_cfp->sp[-1] = reg_cfp->sp[-2];
|
||||
reg_cfp->sp[-2] = calling->recv;
|
||||
flags |= VM_CALL_ARGS_SPLAT;
|
||||
}
|
||||
else {
|
||||
if (calling->argc < 1) {
|
||||
rb_raise(rb_eArgError, "no receiver given");
|
||||
}
|
||||
calling->recv = TOPN(--calling->argc);
|
||||
}
|
||||
if (calling->kw_splat) {
|
||||
flags |= VM_CALL_KW_SPLAT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (calling->argc < 1) {
|
||||
|
@ -4857,9 +4874,7 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|||
}
|
||||
calling->recv = TOPN(--calling->argc);
|
||||
}
|
||||
if (calling->kw_splat) {
|
||||
flags |= VM_CALL_KW_SPLAT;
|
||||
}
|
||||
|
||||
return vm_call_symbol(ec, reg_cfp, calling, ci, symbol, flags);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче