From 9b4bf02aa89fa9a9a568b7be045ab1df8053f0e6 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Sat, 1 Apr 2023 20:55:43 -0700 Subject: [PATCH] Optimize send calls Similar to the bmethod optimization, this avoids using CALLER_ARG_SPLAT if not necessary. As long as the method argument can be shifted off, other arguments are passed through as-is. This optimizes the following types of calls: * send(meth, arg) ~5% * send(meth, *args) ~75% for args.length == 200 * send(meth, *args, **kw) ~50% for args.length == 200 * send(meth, **kw) ~25% * send(meth, kw: 1) ~115% Note that empty argument splats do get slower with this approach, by about 20%. This is probably because iseq argument setup is slower for empty argument splats than CALLER_SETUP_ARG is. Other than non-empty argument splats, other argument splats are faster, with the speedup depending on the number of arguments. The following types of calls are not optimized: * send(*args) * send(*args, **kw) This is because the you cannot shift the method argument off without first splatting the arg. --- benchmark/vm_call_send_iseq.yml | 77 ++++++++++++++++++++++ vm_insnhelper.c | 113 ++++++++++++++++++++++---------- 2 files changed, 154 insertions(+), 36 deletions(-) create mode 100644 benchmark/vm_call_send_iseq.yml diff --git a/benchmark/vm_call_send_iseq.yml b/benchmark/vm_call_send_iseq.yml new file mode 100644 index 0000000000..60ff23c475 --- /dev/null +++ b/benchmark/vm_call_send_iseq.yml @@ -0,0 +1,77 @@ +prelude: | + def a0; end + def a1(a) a; end + def s(*a) a; end + def b(kw: 1) kw end + def sb(*a, kw: 1) kw end + + t0 = 0.times.to_a + t1 = 1.times.to_a + t10 = 10.times.to_a + t200 = 200.times.to_a + + a0_t0 = [:a0, *t0] + a1_t1 = [:a1, *t1] + s_t0 = [:s, *t0] + s_t1 = [:s, *t1] + s_t10 = [:s, *t10] + s_t200 = [:s, *t200] + sb_t0 = [:sb, *t0] + sb_t1 = [:sb, *t1] + sb_t10 = [:sb, *t10] + sb_t200 = [:sb, *t200] + kw = {kw: 2} +benchmark: + send_simple_0: | + send(:a0) + send_simple_1: | + send(:a1, 1) + send_simple_0_splat: | + send(:a0, *t0) + send_simple_1_splat: | + send(:a1, *t1) + send_simple_0_splat_comb: | + send(*a0_t0) + send_simple_1_splat_comb: | + send(*a1_t1) + send_no_splat: | + send(:s) + send_0_splat: | + send(:s, *t0) + send_1_splat: | + send(:s, *t1) + send_10_splat: | + send(:s, *t10) + send_200_splat: | + send(:s, *t200) + send_0_splat_comb: | + send(*s_t0) + send_1_splat_comb: | + send(*s_t1) + send_10_splat_comb: | + send(*s_t10) + send_200_splat_comb: | + send(*s_t200) + send_kw: | + send(:b, kw: 1) + send_no_kw: | + send(:b) + send_kw_splat: | + send(:b, **kw) + send_0_splat_kw: | + send(:sb, *t0, **kw) + send_1_splat_kw: | + send(:sb, *t1, **kw) + send_10_splat_kw: | + send(:sb, *t10, **kw) + send_200_splat_kw: | + send(:sb, *t200, **kw) + send_0_splat_comb_kw: | + send(*sb_t0, **kw) + send_1_splat_comb_kw: | + send(*sb_t1, **kw) + send_10_splat_comb_kw: | + send(*sb_t10, **kw) + send_200_splat_comb_kw: | + send(*sb_t200, **kw) +loop_count: 3000000 diff --git a/vm_insnhelper.c b/vm_insnhelper.c index d66f1e833a..35f864e20d 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3813,14 +3813,13 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol, int flags) { ASSUME(calling->argc >= 0); - /* Also assumes CALLER_SETUP_ARG is already done. */ enum method_missing_reason missing_reason = MISSING_NOENTRY; int argc = calling->argc; VALUE recv = calling->recv; VALUE klass = CLASS_OF(recv); ID mid = rb_check_id(&symbol); - flags |= VM_CALL_OPT_SEND | (calling->kw_splat ? VM_CALL_KW_SPLAT : 0); + flags |= VM_CALL_OPT_SEND; if (UNLIKELY(! mid)) { mid = idMethodMissing; @@ -3910,14 +3909,52 @@ vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, } static VALUE -vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +vm_call_opt_send0(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int flags) { - RB_DEBUG_COUNTER_INC(ccf_opt_send); + const struct rb_callinfo *ci = calling->ci; + int i; + VALUE sym; - int i, flags = VM_CALL_FCALL; - VALUE sym, argv_ary; + i = calling->argc - 1; - CALLER_SETUP_ARG(reg_cfp, calling, calling->ci, ALLOW_HEAP_ARGV); + if (calling->argc == 0) { + rb_raise(rb_eArgError, "no method name given"); + } + + sym = TOPN(i); + /* E.g. when i == 2 + * + * | | | | TOPN + * +------+ | | + * | arg1 | ---+ | | 0 + * +------+ | +------+ + * | arg0 | -+ +-> | arg1 | 1 + * +------+ | +------+ + * | sym | +---> | arg0 | 2 + * +------+ +------+ + * | recv | | recv | 3 + * --+------+--------+------+------ + */ + /* shift arguments */ + if (i > 0) { + MEMMOVE(&TOPN(i), &TOPN(i-1), VALUE, i); + } + calling->argc -= 1; + DEC_SP(1); + + return vm_call_symbol(ec, reg_cfp, calling, ci, sym, flags); +} + +static VALUE +vm_call_opt_send_complex(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_opt_send_complex); + const struct rb_callinfo *ci = calling->ci; + int flags = VM_CALL_FCALL; + VALUE sym; + + VALUE argv_ary; + CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV); if (UNLIKELY(argv_ary = calling->heap_argv)) { sym = rb_ary_shift(argv_ary); flags |= VM_CALL_ARGS_SPLAT; @@ -3926,37 +3963,38 @@ vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct ((struct RHash *)last_hash)->basic.flags |= RHASH_PASS_AS_KEYWORDS; calling->kw_splat = 0; } - } - else { - i = calling->argc - 1; - - if (calling->argc == 0) { - rb_raise(rb_eArgError, "no method name given"); - } - - sym = TOPN(i); - /* E.g. when i == 2 - * - * | | | | TOPN - * +------+ | | - * | arg1 | ---+ | | 0 - * +------+ | +------+ - * | arg0 | -+ +-> | arg1 | 1 - * +------+ | +------+ - * | sym | +---> | arg0 | 2 - * +------+ +------+ - * | recv | | recv | 3 - * --+------+--------+------+------ - */ - /* shift arguments */ - if (i > 0) { - MEMMOVE(&TOPN(i), &TOPN(i-1), VALUE, i); - } - calling->argc -= 1; - DEC_SP(1); + return vm_call_symbol(ec, reg_cfp, calling, ci, sym, flags); } - return vm_call_symbol(ec, reg_cfp, calling, calling->ci, sym, flags); + if (calling->kw_splat) flags |= VM_CALL_KW_SPLAT; + return vm_call_opt_send0(ec, reg_cfp, calling, flags); +} + +static VALUE +vm_call_opt_send_simple(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_opt_send_simple); + return vm_call_opt_send0(ec, reg_cfp, calling, vm_ci_flag(calling->ci) | VM_CALL_FCALL); +} + +static VALUE +vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_opt_send); + + const struct rb_callinfo *ci = calling->ci; + int flags = vm_ci_flag(ci); + + if (UNLIKELY(!(flags & VM_CALL_ARGS_SIMPLE) && + ((calling->argc == 1 && (flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT))) || + (calling->argc == 2 && (flags & VM_CALL_ARGS_SPLAT) && (flags & VM_CALL_KW_SPLAT)) || + ((flags & VM_CALL_KWARG) && (vm_ci_kwarg(ci)->keyword_len == calling->argc))))) { + CC_SET_FASTPATH(calling->cc, vm_call_opt_send_complex, TRUE); + return vm_call_opt_send_complex(ec, reg_cfp, calling); + } + + CC_SET_FASTPATH(calling->cc, vm_call_opt_send_simple, TRUE); + return vm_call_opt_send_simple(ec, reg_cfp, calling); } static VALUE @@ -4819,6 +4857,9 @@ vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, } calling->recv = TOPN(--calling->argc); } + if (calling->kw_splat) { + flags |= VM_CALL_KW_SPLAT; + } return vm_call_symbol(ec, reg_cfp, calling, ci, symbol, flags); }