Optimize cfunc calls for f(*a) and f(*a, **kw) if kw is empty

This optimizes the following calls:

* ~10-15% for f(*a) when a does not end with a flagged keywords hash
* ~10-15% for f(*a) when a ends with an empty flagged keywords hash
* ~35-40% for f(*a, **kw) if kw is empty

This still copies the array contents to the VM stack, but avoids some
overhead. It would be faster to use the array pointer directly,
but that could cause problems if the array was modified during
the call to the function. You could do that optimization for frozen
arrays, but as splatting frozen arrays is uncommon, and the speedup
is minimal (<5%), it doesn't seem worth it.

The vm_send_cfunc benchmark has been updated to test additional cfunc
call types, and the numbers above were taken from the benchmark results.
This commit is contained in:
Jeremy Evans 2023-04-01 09:19:35 -07:00
Родитель f6254f77f7
Коммит af2da6419a
2 изменённых файлов: 96 добавлений и 4 удалений

Просмотреть файл

@ -1,3 +1,14 @@
prelude: |
ary = []
kw = {a: 1}
empty_kw = {}
kw_ary = [Hash.ruby2_keywords_hash(a: 1)]
empty_kw_ary = [Hash.ruby2_keywords_hash({})]
benchmark:
vm_send_cfunc: self.class
loop_count: 100000000
vm_send_cfunc: itself
vm_send_cfunc_splat: itself(*ary)
vm_send_cfunc_splat_kw_hash: equal?(*kw_ary)
vm_send_cfunc_splat_empty_kw_hash: itself(*empty_kw_ary)
vm_send_cfunc_splat_kw: equal?(*ary, **kw)
vm_send_cfunc_splat_empty_kw: itself(*ary, **empty_kw)
loop_count: 20000000

Просмотреть файл

@ -3462,10 +3462,10 @@ vm_call_cfunc_with_frame(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp
}
static VALUE
vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
vm_call_cfunc_other(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
{
const struct rb_callinfo *ci = calling->ci;
RB_DEBUG_COUNTER_INC(ccf_cfunc);
RB_DEBUG_COUNTER_INC(ccf_cfunc_other);
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV_KEEP_KWSPLAT);
VALUE argv_ary;
@ -3488,6 +3488,87 @@ vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb
}
}
static inline VALUE
vm_call_cfunc_array_argv(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int stack_offset, int argc_offset)
{
VALUE argv_ary = reg_cfp->sp[-1 - stack_offset];
int argc = RARRAY_LENINT(argv_ary) - argc_offset;
if (UNLIKELY(argc > VM_ARGC_STACK_MAX)) {
return vm_call_cfunc_other(ec, reg_cfp, calling);
}
VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
calling->kw_splat = 0;
int i;
VALUE *stack_bottom = reg_cfp->sp - 2 - stack_offset;
VALUE *sp = stack_bottom;
CHECK_VM_STACK_OVERFLOW(reg_cfp, argc);
for(i = 0; i < argc; i++) {
*++sp = argv[i];
}
reg_cfp->sp = sp+1;
return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, stack_bottom+1, stack_bottom);
}
static inline VALUE
vm_call_cfunc_only_splat(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
{
RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat);
VALUE argv_ary = reg_cfp->sp[-1];
int argc = RARRAY_LENINT(argv_ary);
VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
VALUE last_hash;
int argc_offset = 0;
if (UNLIKELY(argc > 0 &&
RB_TYPE_P((last_hash = argv[argc-1]), T_HASH) &&
(((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS))) {
if (!RHASH_EMPTY_P(last_hash)) {
return vm_call_cfunc_other(ec, reg_cfp, calling);
}
argc_offset++;
}
return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 0, argc_offset);
}
static inline VALUE
vm_call_cfunc_only_splat_kw(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
{
RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat_kw);
VALUE keyword_hash = reg_cfp->sp[-1];
if (RB_TYPE_P(keyword_hash, T_HASH) && RHASH_EMPTY_P(keyword_hash)) {
return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 1, 0);
}
return vm_call_cfunc_other(ec, reg_cfp, calling);
}
static VALUE
vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
{
const struct rb_callinfo *ci = calling->ci;
RB_DEBUG_COUNTER_INC(ccf_cfunc);
if (IS_ARGS_SPLAT(ci)) {
if (!IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 1) {
// f(*a)
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat, TRUE);
return vm_call_cfunc_only_splat(ec, reg_cfp, calling);
}
if (IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 2) {
// f(*a, **kw)
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat_kw, TRUE);
return vm_call_cfunc_only_splat_kw(ec, reg_cfp, calling);
}
}
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_other, TRUE);
return vm_call_cfunc_other(ec, reg_cfp, calling);
}
static VALUE
vm_call_ivar(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
{