2007-06-24 21:19:22 +04:00
|
|
|
/**********************************************************************
|
|
|
|
|
2008-11-14 14:31:10 +03:00
|
|
|
vm_insnhelper.c - instruction helper functions.
|
2007-06-24 21:19:22 +04:00
|
|
|
|
|
|
|
$Author$
|
|
|
|
|
|
|
|
Copyright (C) 2007 Koichi Sasada
|
|
|
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
2020-05-08 12:31:09 +03:00
|
|
|
#include "ruby/internal/config.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
|
2008-01-17 20:06:51 +03:00
|
|
|
#include <math.h>
|
2019-12-04 11:16:30 +03:00
|
|
|
|
2024-06-21 13:48:37 +03:00
|
|
|
#ifdef HAVE_STDATOMIC_H
|
|
|
|
#include <stdatomic.h>
|
|
|
|
#endif
|
|
|
|
|
2010-10-26 21:27:32 +04:00
|
|
|
#include "constant.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "debug_counter.h"
|
2011-05-18 17:41:54 +04:00
|
|
|
#include "internal.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "internal/class.h"
|
|
|
|
#include "internal/compar.h"
|
|
|
|
#include "internal/hash.h"
|
|
|
|
#include "internal/numeric.h"
|
|
|
|
#include "internal/proc.h"
|
|
|
|
#include "internal/random.h"
|
|
|
|
#include "internal/variable.h"
|
2021-11-18 05:01:31 +03:00
|
|
|
#include "internal/struct.h"
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
#include "variable.h"
|
2008-01-17 20:06:51 +03:00
|
|
|
|
2019-12-04 11:16:30 +03:00
|
|
|
/* finish iseq array */
|
|
|
|
#include "insns.inc"
|
|
|
|
#include "insns_info.inc"
|
|
|
|
|
2019-10-03 06:26:41 +03:00
|
|
|
extern rb_method_definition_t *rb_method_definition_create(rb_method_type_t type, ID mid);
|
|
|
|
extern void rb_method_definition_set(const rb_method_entry_t *me, rb_method_definition_t *def, void *opts);
|
2018-12-28 04:06:04 +03:00
|
|
|
extern int rb_method_definition_eq(const rb_method_definition_t *d1, const rb_method_definition_t *d2);
|
|
|
|
extern VALUE rb_make_no_method_exception(VALUE exc, VALUE format, VALUE obj,
|
|
|
|
int argc, const VALUE *argv, int priv);
|
|
|
|
|
2020-05-29 10:04:53 +03:00
|
|
|
static const struct rb_callcache vm_empty_cc;
|
2021-11-16 11:57:49 +03:00
|
|
|
static const struct rb_callcache vm_empty_cc_for_super;
|
2020-06-05 04:14:06 +03:00
|
|
|
|
2007-06-24 21:19:22 +04:00
|
|
|
/* control stack frame */
|
|
|
|
|
2017-10-27 03:46:11 +03:00
|
|
|
static rb_control_frame_t *vm_get_ruby_level_caller_cfp(const rb_execution_context_t *ec, const rb_control_frame_t *cfp);
|
2009-01-19 06:03:09 +03:00
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
VALUE
|
2017-01-24 17:52:07 +03:00
|
|
|
ruby_vm_special_exception_copy(VALUE exc)
|
2012-12-25 13:57:07 +04:00
|
|
|
{
|
2017-01-24 17:52:07 +03:00
|
|
|
VALUE e = rb_obj_alloc(rb_class_real(RBASIC_CLASS(exc)));
|
|
|
|
rb_obj_copy_ivar(e, exc);
|
2014-06-28 08:58:25 +04:00
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
2017-10-27 04:13:35 +03:00
|
|
|
NORETURN(static void ec_stack_overflow(rb_execution_context_t *ec, int));
|
2017-04-17 05:08:41 +03:00
|
|
|
static void
|
2017-10-27 04:13:35 +03:00
|
|
|
ec_stack_overflow(rb_execution_context_t *ec, int setup)
|
2017-04-17 05:08:41 +03:00
|
|
|
{
|
2017-10-27 04:13:35 +03:00
|
|
|
VALUE mesg = rb_ec_vm_ptr(ec)->special_exceptions[ruby_error_sysstack];
|
|
|
|
ec->raised_flag = RAISED_STACKOVERFLOW;
|
2017-04-17 05:08:41 +03:00
|
|
|
if (setup) {
|
2017-10-28 16:22:04 +03:00
|
|
|
VALUE at = rb_ec_backtrace_object(ec);
|
2017-04-17 05:08:41 +03:00
|
|
|
mesg = ruby_vm_special_exception_copy(mesg);
|
|
|
|
rb_ivar_set(mesg, idBt, at);
|
|
|
|
rb_ivar_set(mesg, idBt_locations, at);
|
|
|
|
}
|
2017-10-27 04:13:35 +03:00
|
|
|
ec->errinfo = mesg;
|
|
|
|
EC_JUMP_TAG(ec, TAG_RAISE);
|
2017-04-17 05:08:41 +03:00
|
|
|
}
|
|
|
|
|
2018-01-18 12:44:48 +03:00
|
|
|
NORETURN(static void vm_stackoverflow(void));
|
|
|
|
|
2014-06-28 08:58:25 +04:00
|
|
|
static void
|
|
|
|
vm_stackoverflow(void)
|
|
|
|
{
|
2017-10-27 04:13:35 +03:00
|
|
|
ec_stack_overflow(GET_EC(), TRUE);
|
2012-12-25 13:57:07 +04:00
|
|
|
}
|
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
NORETURN(void rb_ec_stack_overflow(rb_execution_context_t *ec, int crit));
|
|
|
|
void
|
2017-10-27 04:13:35 +03:00
|
|
|
rb_ec_stack_overflow(rb_execution_context_t *ec, int crit)
|
2017-04-17 05:08:41 +03:00
|
|
|
{
|
2020-10-15 21:51:30 +03:00
|
|
|
if (rb_during_gc()) {
|
|
|
|
rb_bug("system stack overflow during GC. Faulty native extension?");
|
|
|
|
}
|
|
|
|
if (crit) {
|
2017-10-27 04:13:35 +03:00
|
|
|
ec->raised_flag = RAISED_STACKOVERFLOW;
|
|
|
|
ec->errinfo = rb_ec_vm_ptr(ec)->special_exceptions[ruby_error_stackfatal];
|
|
|
|
EC_JUMP_TAG(ec, TAG_RAISE);
|
2017-06-15 10:16:17 +03:00
|
|
|
}
|
2017-04-17 05:08:41 +03:00
|
|
|
#ifdef USE_SIGALTSTACK
|
2017-10-27 04:13:35 +03:00
|
|
|
ec_stack_overflow(ec, TRUE);
|
2017-04-17 05:08:41 +03:00
|
|
|
#else
|
2017-10-27 04:13:35 +03:00
|
|
|
ec_stack_overflow(ec, FALSE);
|
2017-04-17 05:08:41 +03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2023-09-15 00:18:45 +03:00
|
|
|
static inline void stack_check(rb_execution_context_t *ec);
|
2017-04-17 05:08:41 +03:00
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
#if VM_CHECK_MODE > 0
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
static int
|
|
|
|
callable_class_p(VALUE klass)
|
|
|
|
{
|
|
|
|
#if VM_CHECK_MODE >= 2
|
2016-10-09 12:42:17 +03:00
|
|
|
if (!klass) return FALSE;
|
|
|
|
switch (RB_BUILTIN_TYPE(klass)) {
|
2020-05-02 18:15:56 +03:00
|
|
|
default:
|
|
|
|
break;
|
2016-10-09 12:42:17 +03:00
|
|
|
case T_ICLASS:
|
|
|
|
if (!RB_TYPE_P(RCLASS_SUPER(klass), T_MODULE)) break;
|
|
|
|
case T_MODULE:
|
|
|
|
return TRUE;
|
|
|
|
}
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
while (klass) {
|
|
|
|
if (klass == rb_cBasicObject) {
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
klass = RCLASS_SUPER(klass);
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
#else
|
|
|
|
return klass != 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2020-01-08 10:14:01 +03:00
|
|
|
callable_method_entry_p(const rb_callable_method_entry_t *cme)
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
{
|
2020-02-27 04:47:23 +03:00
|
|
|
if (cme == NULL) {
|
|
|
|
return TRUE;
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-02-27 04:47:23 +03:00
|
|
|
VM_ASSERT(IMEMO_TYPE_P((VALUE)cme, imemo_ment));
|
|
|
|
|
|
|
|
if (callable_class_p(cme->defined_class)) {
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return FALSE;
|
|
|
|
}
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
static void
|
2016-08-03 03:16:34 +03:00
|
|
|
vm_check_frame_detail(VALUE type, int req_block, int req_me, int req_cref, VALUE specval, VALUE cref_or_me, int is_cframe, const rb_iseq_t *iseq)
|
2015-06-02 07:20:30 +03:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
unsigned int magic = (unsigned int)(type & VM_FRAME_MAGIC_MASK);
|
2016-07-28 22:13:26 +03:00
|
|
|
enum imemo_type cref_or_me_type = imemo_env; /* impossible value */
|
2015-06-11 00:56:23 +03:00
|
|
|
|
|
|
|
if (RB_TYPE_P(cref_or_me, T_IMEMO)) {
|
|
|
|
cref_or_me_type = imemo_type(cref_or_me);
|
|
|
|
}
|
2015-10-30 00:14:29 +03:00
|
|
|
if (type & VM_FRAME_FLAG_BMETHOD) {
|
|
|
|
req_me = TRUE;
|
|
|
|
}
|
2015-06-11 00:56:23 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
if (req_block && (type & VM_ENV_FLAG_LOCAL) == 0) {
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_bug("vm_push_frame: specval (%p) should be a block_ptr on %x frame", (void *)specval, magic);
|
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
if (!req_block && (type & VM_ENV_FLAG_LOCAL) != 0) {
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_bug("vm_push_frame: specval (%p) should not be a block_ptr on %x frame", (void *)specval, magic);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (req_me) {
|
2015-06-11 00:56:23 +03:00
|
|
|
if (cref_or_me_type != imemo_ment) {
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_bug("vm_push_frame: (%s) should be method entry on %x frame", rb_obj_info(cref_or_me), magic);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2015-06-11 00:56:23 +03:00
|
|
|
if (req_cref && cref_or_me_type != imemo_cref) {
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_bug("vm_push_frame: (%s) should be CREF on %x frame", rb_obj_info(cref_or_me), magic);
|
|
|
|
}
|
|
|
|
else { /* cref or Qfalse */
|
2015-06-11 00:56:23 +03:00
|
|
|
if (cref_or_me != Qfalse && cref_or_me_type != imemo_cref) {
|
2017-06-03 13:07:44 +03:00
|
|
|
if (((type & VM_FRAME_FLAG_LAMBDA) || magic == VM_FRAME_MAGIC_IFUNC) && (cref_or_me_type == imemo_ment)) {
|
2015-06-02 07:20:30 +03:00
|
|
|
/* ignore */
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_bug("vm_push_frame: (%s) should be false or cref on %x frame", rb_obj_info(cref_or_me), magic);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
|
|
|
|
if (cref_or_me_type == imemo_ment) {
|
|
|
|
const rb_callable_method_entry_t *me = (const rb_callable_method_entry_t *)cref_or_me;
|
|
|
|
|
|
|
|
if (!callable_method_entry_p(me)) {
|
|
|
|
rb_bug("vm_push_frame: ment (%s) should be callable on %x frame.", rb_obj_info(cref_or_me), magic);
|
|
|
|
}
|
|
|
|
}
|
2016-08-03 03:16:34 +03:00
|
|
|
|
|
|
|
if ((type & VM_FRAME_MAGIC_MASK) == VM_FRAME_MAGIC_DUMMY) {
|
|
|
|
VM_ASSERT(iseq == NULL ||
|
2022-10-17 11:50:42 +03:00
|
|
|
RBASIC_CLASS((VALUE)iseq) == 0 || // dummy frame for loading
|
|
|
|
RUBY_VM_NORMAL_ISEQ_P(iseq) //argument error
|
|
|
|
);
|
2016-08-03 03:16:34 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
VM_ASSERT(is_cframe == !RUBY_VM_NORMAL_ISEQ_P(iseq));
|
|
|
|
}
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
|
2015-09-12 21:47:43 +03:00
|
|
|
static void
|
|
|
|
vm_check_frame(VALUE type,
|
|
|
|
VALUE specval,
|
2016-08-03 03:16:34 +03:00
|
|
|
VALUE cref_or_me,
|
|
|
|
const rb_iseq_t *iseq)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2016-08-03 03:16:34 +03:00
|
|
|
VALUE given_magic = type & VM_FRAME_MAGIC_MASK;
|
2016-07-28 14:02:30 +03:00
|
|
|
VM_ASSERT(FIXNUM_P(type));
|
2015-06-02 07:20:30 +03:00
|
|
|
|
2016-08-03 05:30:37 +03:00
|
|
|
#define CHECK(magic, req_block, req_me, req_cref, is_cframe) \
|
|
|
|
case magic: \
|
|
|
|
vm_check_frame_detail(type, req_block, req_me, req_cref, \
|
|
|
|
specval, cref_or_me, is_cframe, iseq); \
|
|
|
|
break
|
2016-08-03 03:16:34 +03:00
|
|
|
switch (given_magic) {
|
|
|
|
/* BLK ME CREF CFRAME */
|
|
|
|
CHECK(VM_FRAME_MAGIC_METHOD, TRUE, TRUE, FALSE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_CLASS, TRUE, FALSE, TRUE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_TOP, TRUE, FALSE, TRUE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_CFUNC, TRUE, TRUE, FALSE, TRUE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_BLOCK, FALSE, FALSE, FALSE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_IFUNC, FALSE, FALSE, FALSE, TRUE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_EVAL, FALSE, FALSE, FALSE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_RESCUE, FALSE, FALSE, FALSE, FALSE);
|
|
|
|
CHECK(VM_FRAME_MAGIC_DUMMY, TRUE, FALSE, FALSE, FALSE);
|
2015-06-02 07:20:30 +03:00
|
|
|
default:
|
2016-08-03 03:16:34 +03:00
|
|
|
rb_bug("vm_push_frame: unknown type (%x)", (unsigned int)given_magic);
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
2015-09-12 21:47:43 +03:00
|
|
|
#undef CHECK
|
|
|
|
}
|
2019-02-01 10:26:39 +03:00
|
|
|
|
|
|
|
static VALUE vm_stack_canary; /* Initialized later */
|
|
|
|
static bool vm_stack_canary_was_born = false;
|
|
|
|
|
2024-02-22 02:05:40 +03:00
|
|
|
// Return the index of the instruction right before the given PC.
|
|
|
|
// This is needed because insn_entry advances PC before the insn body.
|
|
|
|
static unsigned int
|
|
|
|
previous_insn_index(const rb_iseq_t *iseq, const VALUE *pc)
|
|
|
|
{
|
|
|
|
unsigned int pos = 0;
|
|
|
|
while (pos < ISEQ_BODY(iseq)->iseq_size) {
|
|
|
|
int opcode = rb_vm_insn_addr2opcode((void *)ISEQ_BODY(iseq)->iseq_encoded[pos]);
|
|
|
|
unsigned int next_pos = pos + insn_len(opcode);
|
|
|
|
if (ISEQ_BODY(iseq)->iseq_encoded + next_pos == pc) {
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
pos = next_pos;
|
|
|
|
}
|
|
|
|
rb_bug("failed to find the previous insn");
|
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
void
|
2020-12-25 17:36:25 +03:00
|
|
|
rb_vm_check_canary(const rb_execution_context_t *ec, VALUE *sp)
|
2019-02-01 10:26:39 +03:00
|
|
|
{
|
|
|
|
const struct rb_control_frame_struct *reg_cfp = ec->cfp;
|
|
|
|
const struct rb_iseq_struct *iseq;
|
|
|
|
|
|
|
|
if (! LIKELY(vm_stack_canary_was_born)) {
|
|
|
|
return; /* :FIXME: isn't it rather fatal to enter this branch? */
|
|
|
|
}
|
2019-04-25 09:03:18 +03:00
|
|
|
else if ((VALUE *)reg_cfp == ec->vm_stack + ec->vm_stack_size) {
|
|
|
|
/* This is at the very beginning of a thread. cfp does not exist. */
|
|
|
|
return;
|
|
|
|
}
|
2019-02-01 10:26:39 +03:00
|
|
|
else if (! (iseq = GET_ISEQ())) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else if (LIKELY(sp[0] != vm_stack_canary)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
2019-12-20 03:19:39 +03:00
|
|
|
/* we are going to call methods below; squash the canary to
|
2019-02-01 10:26:39 +03:00
|
|
|
* prevent infinite loop. */
|
|
|
|
sp[0] = Qundef;
|
|
|
|
}
|
|
|
|
|
|
|
|
const VALUE *orig = rb_iseq_original_iseq(iseq);
|
|
|
|
const VALUE iseqw = rb_iseqw_new(iseq);
|
|
|
|
const VALUE inspection = rb_inspect(iseqw);
|
|
|
|
const char *stri = rb_str_to_cstr(inspection);
|
|
|
|
const VALUE disasm = rb_iseq_disasm(iseq);
|
2019-04-25 08:33:44 +03:00
|
|
|
const char *strd = rb_str_to_cstr(disasm);
|
2024-02-29 21:30:38 +03:00
|
|
|
const ptrdiff_t pos = previous_insn_index(iseq, GET_PC());
|
|
|
|
const enum ruby_vminsn_type insn = (enum ruby_vminsn_type)orig[pos];
|
|
|
|
const char *name = insn_name(insn);
|
2019-02-01 10:26:39 +03:00
|
|
|
|
|
|
|
/* rb_bug() is not capable of outputting this large contents. It
|
|
|
|
is designed to run form a SIGSEGV handler, which tends to be
|
|
|
|
very restricted. */
|
2021-09-09 17:21:06 +03:00
|
|
|
ruby_debug_printf(
|
2019-02-01 10:26:39 +03:00
|
|
|
"We are killing the stack canary set by %s, "
|
|
|
|
"at %s@pc=%"PRIdPTR"\n"
|
|
|
|
"watch out the C stack trace.\n"
|
|
|
|
"%s",
|
|
|
|
name, stri, pos, strd);
|
|
|
|
rb_bug("see above.");
|
|
|
|
}
|
2020-12-25 17:36:25 +03:00
|
|
|
#define vm_check_canary(ec, sp) rb_vm_check_canary(ec, sp)
|
2020-02-03 10:57:41 +03:00
|
|
|
|
2015-09-12 21:47:43 +03:00
|
|
|
#else
|
2019-02-01 10:26:39 +03:00
|
|
|
#define vm_check_canary(ec, sp)
|
2016-08-03 03:16:34 +03:00
|
|
|
#define vm_check_frame(a, b, c, d)
|
2015-09-12 21:47:43 +03:00
|
|
|
#endif /* VM_CHECK_MODE > 0 */
|
|
|
|
|
2020-07-06 06:53:37 +03:00
|
|
|
#if USE_DEBUG_COUNTER
|
|
|
|
static void
|
|
|
|
vm_push_frame_debug_counter_inc(
|
2020-07-10 06:48:47 +03:00
|
|
|
const struct rb_execution_context_struct *ec,
|
2020-07-06 06:53:37 +03:00
|
|
|
const struct rb_control_frame_struct *reg_cfp,
|
|
|
|
VALUE type)
|
|
|
|
{
|
|
|
|
const struct rb_control_frame_struct *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(reg_cfp);
|
|
|
|
|
|
|
|
RB_DEBUG_COUNTER_INC(frame_push);
|
|
|
|
|
|
|
|
if (RUBY_VM_END_CONTROL_FRAME(ec) != prev_cfp) {
|
|
|
|
const bool curr = VM_FRAME_RUBYFRAME_P(reg_cfp);
|
|
|
|
const bool prev = VM_FRAME_RUBYFRAME_P(prev_cfp);
|
2020-07-07 03:02:22 +03:00
|
|
|
if (prev) {
|
2020-07-10 06:32:48 +03:00
|
|
|
if (curr) {
|
2020-07-07 03:02:22 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(frame_R2R);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
RB_DEBUG_COUNTER_INC(frame_R2C);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2020-07-10 06:32:48 +03:00
|
|
|
if (curr) {
|
2020-07-07 03:02:22 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(frame_C2R);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
RB_DEBUG_COUNTER_INC(frame_C2C);
|
|
|
|
}
|
|
|
|
}
|
2020-07-06 06:53:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (type & VM_FRAME_MAGIC_MASK) {
|
|
|
|
case VM_FRAME_MAGIC_METHOD: RB_DEBUG_COUNTER_INC(frame_push_method); return;
|
|
|
|
case VM_FRAME_MAGIC_BLOCK: RB_DEBUG_COUNTER_INC(frame_push_block); return;
|
|
|
|
case VM_FRAME_MAGIC_CLASS: RB_DEBUG_COUNTER_INC(frame_push_class); return;
|
|
|
|
case VM_FRAME_MAGIC_TOP: RB_DEBUG_COUNTER_INC(frame_push_top); return;
|
|
|
|
case VM_FRAME_MAGIC_CFUNC: RB_DEBUG_COUNTER_INC(frame_push_cfunc); return;
|
|
|
|
case VM_FRAME_MAGIC_IFUNC: RB_DEBUG_COUNTER_INC(frame_push_ifunc); return;
|
|
|
|
case VM_FRAME_MAGIC_EVAL: RB_DEBUG_COUNTER_INC(frame_push_eval); return;
|
|
|
|
case VM_FRAME_MAGIC_RESCUE: RB_DEBUG_COUNTER_INC(frame_push_rescue); return;
|
|
|
|
case VM_FRAME_MAGIC_DUMMY: RB_DEBUG_COUNTER_INC(frame_push_dummy); return;
|
|
|
|
}
|
|
|
|
|
|
|
|
rb_bug("unreachable");
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define vm_push_frame_debug_counter_inc(ec, cfp, t) /* void */
|
|
|
|
#endif
|
|
|
|
|
2024-02-21 00:42:29 +03:00
|
|
|
// Return a poison value to be set above the stack top to verify leafness.
|
|
|
|
VALUE
|
|
|
|
rb_vm_stack_canary(void)
|
|
|
|
{
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
return vm_stack_canary;
|
|
|
|
#else
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-06 08:58:57 +03:00
|
|
|
STATIC_ASSERT(VM_ENV_DATA_INDEX_ME_CREF, VM_ENV_DATA_INDEX_ME_CREF == -2);
|
|
|
|
STATIC_ASSERT(VM_ENV_DATA_INDEX_SPECVAL, VM_ENV_DATA_INDEX_SPECVAL == -1);
|
|
|
|
STATIC_ASSERT(VM_ENV_DATA_INDEX_FLAGS, VM_ENV_DATA_INDEX_FLAGS == -0);
|
|
|
|
|
2020-07-06 05:51:53 +03:00
|
|
|
static void
|
2017-10-26 13:53:42 +03:00
|
|
|
vm_push_frame(rb_execution_context_t *ec,
|
|
|
|
const rb_iseq_t *iseq,
|
|
|
|
VALUE type,
|
|
|
|
VALUE self,
|
|
|
|
VALUE specval,
|
|
|
|
VALUE cref_or_me,
|
|
|
|
const VALUE *pc,
|
|
|
|
VALUE *sp,
|
|
|
|
int local_size,
|
|
|
|
int stack_max)
|
2015-09-12 21:47:43 +03:00
|
|
|
{
|
2019-09-20 15:06:07 +03:00
|
|
|
rb_control_frame_t *const cfp = RUBY_VM_NEXT_CONTROL_FRAME(ec->cfp);
|
2015-09-12 21:47:43 +03:00
|
|
|
|
2016-08-03 03:16:34 +03:00
|
|
|
vm_check_frame(type, specval, cref_or_me, iseq);
|
2016-07-28 14:02:30 +03:00
|
|
|
VM_ASSERT(local_size >= 0);
|
2015-06-02 07:20:30 +03:00
|
|
|
|
2012-06-11 07:14:59 +04:00
|
|
|
/* check stack overflow */
|
2014-07-16 15:46:06 +04:00
|
|
|
CHECK_VM_STACK_OVERFLOW0(cfp, sp, local_size + stack_max);
|
2019-02-01 10:26:39 +03:00
|
|
|
vm_check_canary(ec, sp);
|
2013-08-06 12:33:05 +04:00
|
|
|
|
* vm.c, insns.def, eval.c, vm_insnhelper.c: fix CREF handling.
VM value stack frame of block contains cref information.
(dfp[-1] points CREF)
* compile.c, eval_intern.h, eval_method.c, load.c, proc.c,
vm_dump.h, vm_core.h: ditto.
* include/ruby/ruby.h, gc.c: remove T_VALUES because of above
changes.
* bootstraptest/test_eval.rb, test_knownbug.rb: move solved test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16468 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-05-19 07:08:50 +04:00
|
|
|
/* setup vm value stack */
|
2009-02-22 17:23:33 +03:00
|
|
|
|
2012-06-11 07:14:59 +04:00
|
|
|
/* initialize local variables */
|
2019-09-20 15:06:07 +03:00
|
|
|
for (int i=0; i < local_size; i++) {
|
2012-06-11 07:14:59 +04:00
|
|
|
*sp++ = Qnil;
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
/* setup ep with managing data */
|
|
|
|
*sp++ = cref_or_me; /* ep[-2] / Qnil or T_IMEMO(cref) or T_IMEMO(ment) */
|
|
|
|
*sp++ = specval /* ep[-1] / block handler or prev env ptr */;
|
2020-07-06 09:10:10 +03:00
|
|
|
*sp++ = type; /* ep[-0] / ENV_FLAGS */
|
2015-10-11 01:15:18 +03:00
|
|
|
|
2020-07-06 09:10:10 +03:00
|
|
|
/* setup new frame */
|
|
|
|
*cfp = (const struct rb_control_frame_struct) {
|
|
|
|
.pc = pc,
|
|
|
|
.sp = sp,
|
|
|
|
.iseq = iseq,
|
|
|
|
.self = self,
|
|
|
|
.ep = sp - 1,
|
|
|
|
.block_code = NULL,
|
2012-09-28 08:05:36 +04:00
|
|
|
#if VM_DEBUG_BP_CHECK
|
2020-07-06 09:10:10 +03:00
|
|
|
.bp_check = sp,
|
2012-09-28 08:05:36 +04:00
|
|
|
#endif
|
2021-02-10 00:24:06 +03:00
|
|
|
.jit_return = NULL
|
2020-07-06 09:10:10 +03:00
|
|
|
};
|
|
|
|
|
2024-06-21 13:48:37 +03:00
|
|
|
/* Ensure the initialization of `*cfp` above never gets reordered with the update of `ec->cfp` below.
|
|
|
|
This is a no-op in all cases we've looked at (https://godbolt.org/z/3oxd1446K), but should guarantee it for all
|
|
|
|
future/untested compilers/platforms. */
|
|
|
|
|
2024-07-14 04:36:35 +03:00
|
|
|
#if defined HAVE_DECL_ATOMIC_SIGNAL_FENCE && HAVE_DECL_ATOMIC_SIGNAL_FENCE
|
2024-06-21 13:48:37 +03:00
|
|
|
atomic_signal_fence(memory_order_seq_cst);
|
|
|
|
#endif
|
|
|
|
|
2020-07-06 09:10:10 +03:00
|
|
|
ec->cfp = cfp;
|
2015-06-02 07:20:30 +03:00
|
|
|
|
2007-08-12 23:12:55 +04:00
|
|
|
if (VMDEBUG == 2) {
|
|
|
|
SDR();
|
|
|
|
}
|
2020-07-06 06:53:37 +03:00
|
|
|
vm_push_frame_debug_counter_inc(ec, cfp, type);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2023-01-16 23:42:51 +03:00
|
|
|
void
|
|
|
|
rb_vm_pop_frame_no_int(rb_execution_context_t *ec)
|
|
|
|
{
|
|
|
|
rb_control_frame_t *cfp = ec->cfp;
|
|
|
|
|
|
|
|
if (VMDEBUG == 2) SDR();
|
|
|
|
|
|
|
|
ec->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
/* return TRUE if the frame is finished */
|
2016-07-26 13:28:21 +03:00
|
|
|
static inline int
|
2017-10-26 13:55:24 +03:00
|
|
|
vm_pop_frame(rb_execution_context_t *ec, rb_control_frame_t *cfp, const VALUE *ep)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
|
|
|
|
|
2016-07-26 13:28:21 +03:00
|
|
|
if (VMDEBUG == 2) SDR();
|
2007-08-12 23:12:55 +04:00
|
|
|
|
2019-11-29 11:39:06 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
2017-10-26 13:55:24 +03:00
|
|
|
ec->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
2016-07-26 13:28:21 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
return flags & VM_FRAME_FLAG_FINISH;
|
2016-07-26 13:28:21 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
void
|
2017-10-26 13:55:24 +03:00
|
|
|
rb_vm_pop_frame(rb_execution_context_t *ec)
|
2016-07-26 13:28:21 +03:00
|
|
|
{
|
2017-10-26 13:55:24 +03:00
|
|
|
vm_pop_frame(ec, ec->cfp, ec->cfp->ep);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2022-10-17 11:50:42 +03:00
|
|
|
// it pushes pseudo-frame with fname filename.
|
|
|
|
VALUE
|
|
|
|
rb_vm_push_frame_fname(rb_execution_context_t *ec, VALUE fname)
|
|
|
|
{
|
|
|
|
VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer();
|
|
|
|
void *ptr = ruby_xcalloc(sizeof(struct rb_iseq_constant_body) + sizeof(struct rb_iseq_struct), 1);
|
|
|
|
rb_imemo_tmpbuf_set_ptr(tmpbuf, ptr);
|
|
|
|
|
|
|
|
struct rb_iseq_struct *dmy_iseq = (struct rb_iseq_struct *)ptr;
|
|
|
|
struct rb_iseq_constant_body *dmy_body = (struct rb_iseq_constant_body *)&dmy_iseq[1];
|
|
|
|
dmy_iseq->body = dmy_body;
|
|
|
|
dmy_body->type = ISEQ_TYPE_TOP;
|
|
|
|
dmy_body->location.pathobj = fname;
|
|
|
|
|
|
|
|
vm_push_frame(ec,
|
|
|
|
dmy_iseq, //const rb_iseq_t *iseq,
|
|
|
|
VM_FRAME_MAGIC_DUMMY | VM_ENV_FLAG_LOCAL | VM_FRAME_FLAG_FINISH, // VALUE type,
|
|
|
|
ec->cfp->self, // VALUE self,
|
|
|
|
VM_BLOCK_HANDLER_NONE, // VALUE specval,
|
|
|
|
Qfalse, // VALUE cref_or_me,
|
|
|
|
NULL, // const VALUE *pc,
|
|
|
|
ec->cfp->sp, // VALUE *sp,
|
|
|
|
0, // int local_size,
|
|
|
|
0); // int stack_max
|
|
|
|
|
|
|
|
return tmpbuf;
|
|
|
|
}
|
|
|
|
|
2007-06-24 21:19:22 +04:00
|
|
|
/* method dispatch */
|
2012-03-15 01:10:16 +04:00
|
|
|
static inline VALUE
|
2014-11-27 13:15:47 +03:00
|
|
|
rb_arity_error_new(int argc, int min, int max)
|
2012-03-15 05:39:00 +04:00
|
|
|
{
|
2022-10-01 10:18:03 +03:00
|
|
|
VALUE err_mess = rb_sprintf("wrong number of arguments (given %d, expected %d", argc, min);
|
2012-03-15 01:10:16 +04:00
|
|
|
if (min == max) {
|
2022-10-01 10:18:03 +03:00
|
|
|
/* max is not needed */
|
2012-03-15 01:10:16 +04:00
|
|
|
}
|
|
|
|
else if (max == UNLIMITED_ARGUMENTS) {
|
2022-10-01 10:18:03 +03:00
|
|
|
rb_str_cat_cstr(err_mess, "+");
|
2012-03-15 01:10:16 +04:00
|
|
|
}
|
|
|
|
else {
|
2022-10-01 10:18:03 +03:00
|
|
|
rb_str_catf(err_mess, "..%d", max);
|
2012-03-15 01:10:16 +04:00
|
|
|
}
|
2022-10-01 10:18:03 +03:00
|
|
|
rb_str_cat_cstr(err_mess, ")");
|
2012-03-15 01:10:16 +04:00
|
|
|
return rb_exc_new3(rb_eArgError, err_mess);
|
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
void
|
2012-03-15 05:39:00 +04:00
|
|
|
rb_error_arity(int argc, int min, int max)
|
|
|
|
{
|
2014-11-27 13:15:47 +03:00
|
|
|
rb_exc_raise(rb_arity_error_new(argc, min, max));
|
2012-03-15 01:10:34 +04:00
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
/* lvar */
|
|
|
|
|
|
|
|
NOINLINE(static void vm_env_write_slowpath(const VALUE *ep, int index, VALUE v));
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
static void
|
|
|
|
vm_env_write_slowpath(const VALUE *ep, int index, VALUE v)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
/* remember env value forcely */
|
|
|
|
rb_gc_writebarrier_remember(VM_ENV_ENVVAL(ep));
|
|
|
|
VM_FORCE_WRITE(&ep[index], v);
|
|
|
|
VM_ENV_FLAGS_UNSET(ep, VM_ENV_FLAG_WB_REQUIRED);
|
2017-05-31 09:46:57 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(lvar_set_slowpath);
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2024-03-01 04:53:48 +03:00
|
|
|
// YJIT assumes this function never runs GC
|
2016-07-28 14:02:30 +03:00
|
|
|
static inline void
|
|
|
|
vm_env_write(const VALUE *ep, int index, VALUE v)
|
|
|
|
{
|
|
|
|
VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
|
|
|
|
if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
|
|
|
|
VM_STACK_ENV_WRITE(ep, index, v);
|
2008-06-06 18:48:07 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
else {
|
2016-07-28 14:02:30 +03:00
|
|
|
vm_env_write_slowpath(ep, index, v);
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
|
|
|
|
2023-08-17 17:11:17 +03:00
|
|
|
void
|
|
|
|
rb_vm_env_write(const VALUE *ep, int index, VALUE v)
|
|
|
|
{
|
|
|
|
vm_env_write(ep, index, v);
|
|
|
|
}
|
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
VALUE
|
2017-10-29 18:25:32 +03:00
|
|
|
rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler)
|
2017-10-24 14:13:49 +03:00
|
|
|
{
|
|
|
|
if (block_handler == VM_BLOCK_HANDLER_NONE) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
switch (vm_block_handler_type(block_handler)) {
|
|
|
|
case block_handler_type_iseq:
|
|
|
|
case block_handler_type_ifunc:
|
2017-10-27 02:33:59 +03:00
|
|
|
return rb_vm_make_proc(ec, VM_BH_TO_CAPT_BLOCK(block_handler), rb_cProc);
|
2017-10-24 14:13:49 +03:00
|
|
|
case block_handler_type_symbol:
|
|
|
|
return rb_sym_to_proc(VM_BH_TO_SYMBOL(block_handler));
|
|
|
|
case block_handler_type_proc:
|
|
|
|
return VM_BH_TO_PROC(block_handler);
|
|
|
|
default:
|
|
|
|
VM_UNREACHABLE(rb_vm_bh_to_procval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
|
|
|
|
/* svar */
|
2015-06-02 07:20:30 +03:00
|
|
|
|
|
|
|
#if VM_CHECK_MODE > 0
|
2016-07-28 14:02:30 +03:00
|
|
|
static int
|
|
|
|
vm_svar_valid_p(VALUE svar)
|
|
|
|
{
|
|
|
|
if (RB_TYPE_P((VALUE)svar, T_IMEMO)) {
|
|
|
|
switch (imemo_type(svar)) {
|
|
|
|
case imemo_svar:
|
|
|
|
case imemo_cref:
|
|
|
|
case imemo_ment:
|
|
|
|
return TRUE;
|
|
|
|
default:
|
|
|
|
break;
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
rb_bug("vm_svar_valid_p: unknown type: %s", rb_obj_info(svar));
|
|
|
|
return FALSE;
|
|
|
|
}
|
2015-06-02 07:20:30 +03:00
|
|
|
#endif
|
2015-02-24 13:11:14 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
static inline struct vm_svar *
|
2017-10-27 04:31:15 +03:00
|
|
|
lep_svar(const rb_execution_context_t *ec, const VALUE *lep)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
|
|
|
VALUE svar;
|
|
|
|
|
2017-10-27 04:22:01 +03:00
|
|
|
if (lep && (ec == NULL || ec->root_lep != lep)) {
|
2016-07-28 14:02:30 +03:00
|
|
|
svar = lep[VM_ENV_DATA_INDEX_ME_CREF];
|
|
|
|
}
|
|
|
|
else {
|
2017-10-27 04:22:01 +03:00
|
|
|
svar = ec->root_svar;
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
VM_ASSERT(svar == Qfalse || vm_svar_valid_p(svar));
|
|
|
|
|
|
|
|
return (struct vm_svar *)svar;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2017-10-27 04:31:15 +03:00
|
|
|
lep_svar_write(const rb_execution_context_t *ec, const VALUE *lep, const struct vm_svar *svar)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
|
|
|
VM_ASSERT(vm_svar_valid_p((VALUE)svar));
|
|
|
|
|
2017-10-27 04:22:01 +03:00
|
|
|
if (lep && (ec == NULL || ec->root_lep != lep)) {
|
2016-07-28 14:02:30 +03:00
|
|
|
vm_env_write(lep, VM_ENV_DATA_INDEX_ME_CREF, (VALUE)svar);
|
|
|
|
}
|
|
|
|
else {
|
2017-10-27 04:22:01 +03:00
|
|
|
RB_OBJ_WRITE(rb_ec_thread_ptr(ec)->self, &ec->root_svar, svar);
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2008-06-06 18:48:07 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2017-10-27 04:31:15 +03:00
|
|
|
lep_svar_get(const rb_execution_context_t *ec, const VALUE *lep, rb_num_t key)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2017-10-27 04:22:01 +03:00
|
|
|
const struct vm_svar *svar = lep_svar(ec, lep);
|
2015-02-24 13:11:14 +03:00
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
if ((VALUE)svar == Qfalse || imemo_type((VALUE)svar) != imemo_svar) return Qnil;
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
switch (key) {
|
2015-02-27 11:10:04 +03:00
|
|
|
case VM_SVAR_LASTLINE:
|
2015-03-09 00:53:05 +03:00
|
|
|
return svar->lastline;
|
2015-02-27 11:10:04 +03:00
|
|
|
case VM_SVAR_BACKREF:
|
2015-03-09 00:53:05 +03:00
|
|
|
return svar->backref;
|
2012-10-14 23:58:59 +04:00
|
|
|
default: {
|
2015-03-09 00:53:05 +03:00
|
|
|
const VALUE ary = svar->others;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-12-10 10:11:16 +04:00
|
|
|
if (NIL_P(ary)) {
|
2012-10-14 23:58:59 +04:00
|
|
|
return Qnil;
|
2011-12-26 18:20:09 +04:00
|
|
|
}
|
|
|
|
else {
|
2015-02-27 11:10:04 +03:00
|
|
|
return rb_ary_entry(ary, key - VM_SVAR_EXTRA_START);
|
2011-12-26 18:20:09 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2011-12-26 18:20:09 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2011-12-26 18:20:09 +04:00
|
|
|
|
2015-03-11 15:27:34 +03:00
|
|
|
static struct vm_svar *
|
2015-06-02 07:20:30 +03:00
|
|
|
svar_new(VALUE obj)
|
2015-03-11 15:27:34 +03:00
|
|
|
{
|
2024-02-20 23:58:10 +03:00
|
|
|
struct vm_svar *svar = IMEMO_NEW(struct vm_svar, imemo_svar, obj);
|
|
|
|
*((VALUE *)&svar->lastline) = Qnil;
|
|
|
|
*((VALUE *)&svar->backref) = Qnil;
|
|
|
|
*((VALUE *)&svar->others) = Qnil;
|
|
|
|
|
|
|
|
return svar;
|
2015-03-11 15:27:34 +03:00
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static void
|
2017-10-27 04:31:15 +03:00
|
|
|
lep_svar_set(const rb_execution_context_t *ec, const VALUE *lep, rb_num_t key, VALUE val)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2017-10-27 04:22:01 +03:00
|
|
|
struct vm_svar *svar = lep_svar(ec, lep);
|
2015-02-24 13:11:14 +03:00
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
if ((VALUE)svar == Qfalse || imemo_type((VALUE)svar) != imemo_svar) {
|
2017-10-27 04:22:01 +03:00
|
|
|
lep_svar_write(ec, lep, svar = svar_new((VALUE)svar));
|
2015-02-24 13:11:14 +03:00
|
|
|
}
|
2008-05-21 19:18:15 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
switch (key) {
|
2015-02-27 11:10:04 +03:00
|
|
|
case VM_SVAR_LASTLINE:
|
2015-03-09 00:53:05 +03:00
|
|
|
RB_OBJ_WRITE(svar, &svar->lastline, val);
|
2012-10-14 23:58:59 +04:00
|
|
|
return;
|
2015-02-27 11:10:04 +03:00
|
|
|
case VM_SVAR_BACKREF:
|
2015-03-09 00:53:05 +03:00
|
|
|
RB_OBJ_WRITE(svar, &svar->backref, val);
|
2012-10-14 23:58:59 +04:00
|
|
|
return;
|
|
|
|
default: {
|
2015-03-09 00:53:05 +03:00
|
|
|
VALUE ary = svar->others;
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-12-10 10:11:16 +04:00
|
|
|
if (NIL_P(ary)) {
|
2015-03-09 00:53:05 +03:00
|
|
|
RB_OBJ_WRITE(svar, &svar->others, ary = rb_ary_new());
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2015-02-27 11:10:04 +03:00
|
|
|
rb_ary_store(ary, key - VM_SVAR_EXTRA_START, val);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
2017-10-27 04:31:15 +03:00
|
|
|
vm_getspecial(const rb_execution_context_t *ec, const VALUE *lep, rb_num_t key, rb_num_t type)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
|
|
|
VALUE val;
|
|
|
|
|
|
|
|
if (type == 0) {
|
2017-10-27 04:22:01 +03:00
|
|
|
val = lep_svar_get(ec, lep, key);
|
2008-06-06 18:48:07 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
else {
|
2017-10-27 04:22:01 +03:00
|
|
|
VALUE backref = lep_svar_get(ec, lep, VM_SVAR_BACKREF);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (type & 0x01) {
|
|
|
|
switch (type >> 1) {
|
|
|
|
case '&':
|
|
|
|
val = rb_reg_last_match(backref);
|
|
|
|
break;
|
|
|
|
case '`':
|
|
|
|
val = rb_reg_match_pre(backref);
|
|
|
|
break;
|
|
|
|
case '\'':
|
|
|
|
val = rb_reg_match_post(backref);
|
|
|
|
break;
|
|
|
|
case '+':
|
|
|
|
val = rb_reg_match_last(backref);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
rb_bug("unexpected back-ref");
|
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2008-06-06 18:48:07 +04:00
|
|
|
else {
|
2012-10-14 23:58:59 +04:00
|
|
|
val = rb_reg_nth_match((int)(type >> 1), backref);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2008-06-06 18:48:07 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
return val;
|
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2023-06-27 17:14:10 +03:00
|
|
|
static inline VALUE
|
|
|
|
vm_backref_defined(const rb_execution_context_t *ec, const VALUE *lep, rb_num_t type)
|
|
|
|
{
|
|
|
|
VALUE backref = lep_svar_get(ec, lep, VM_SVAR_BACKREF);
|
|
|
|
int nth = 0;
|
|
|
|
|
|
|
|
if (type & 0x01) {
|
|
|
|
switch (type >> 1) {
|
|
|
|
case '&':
|
|
|
|
case '`':
|
|
|
|
case '\'':
|
|
|
|
break;
|
|
|
|
case '+':
|
|
|
|
return rb_reg_last_defined(backref);
|
|
|
|
default:
|
|
|
|
rb_bug("unexpected back-ref");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
nth = (int)(type >> 1);
|
|
|
|
}
|
|
|
|
return rb_reg_nth_defined(nth, backref);
|
|
|
|
}
|
|
|
|
|
2019-10-03 06:26:41 +03:00
|
|
|
PUREFUNC(static rb_callable_method_entry_t *check_method_entry(VALUE obj, int can_be_svar));
|
|
|
|
static rb_callable_method_entry_t *
|
2015-06-02 07:20:30 +03:00
|
|
|
check_method_entry(VALUE obj, int can_be_svar)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2015-06-02 07:20:30 +03:00
|
|
|
if (obj == Qfalse) return NULL;
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
if (!RB_TYPE_P(obj, T_IMEMO)) rb_bug("check_method_entry: unknown type: %s", rb_obj_info(obj));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
switch (imemo_type(obj)) {
|
|
|
|
case imemo_ment:
|
2019-10-03 06:26:41 +03:00
|
|
|
return (rb_callable_method_entry_t *)obj;
|
2015-06-02 07:20:30 +03:00
|
|
|
case imemo_cref:
|
|
|
|
return NULL;
|
|
|
|
case imemo_svar:
|
|
|
|
if (can_be_svar) {
|
|
|
|
return check_method_entry(((struct vm_svar *)obj)->cref_or_me, FALSE);
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
rb_bug("check_method_entry: svar should not be there:");
|
|
|
|
#endif
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
const rb_callable_method_entry_t *
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_vm_frame_method_entry(const rb_control_frame_t *cfp)
|
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
const VALUE *ep = cfp->ep;
|
2019-10-03 06:26:41 +03:00
|
|
|
rb_callable_method_entry_t *me;
|
2015-06-02 07:20:30 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
while (!VM_ENV_LOCAL_P(ep)) {
|
|
|
|
if ((me = check_method_entry(ep[VM_ENV_DATA_INDEX_ME_CREF], FALSE)) != NULL) return me;
|
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
}
|
2015-06-02 07:20:30 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
return check_method_entry(ep[VM_ENV_DATA_INDEX_ME_CREF], TRUE);
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
|
`Primitive.mandatory_only?` for fast path
Compare with the C methods, A built-in methods written in Ruby is
slower if only mandatory parameters are given because it needs to
check the argumens and fill default values for optional and keyword
parameters (C methods can check the number of parameters with `argc`,
so there are no overhead). Passing mandatory arguments are common
(optional arguments are exceptional, in many cases) so it is important
to provide the fast path for such common cases.
`Primitive.mandatory_only?` is a special builtin function used with
`if` expression like that:
```ruby
def self.at(time, subsec = false, unit = :microsecond, in: nil)
if Primitive.mandatory_only?
Primitive.time_s_at1(time)
else
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
end
```
and it makes two ISeq,
```
def self.at(time, subsec = false, unit = :microsecond, in: nil)
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
def self.at(time)
Primitive.time_s_at1(time)
end
```
and (2) is pointed by (1). Note that `Primitive.mandatory_only?`
should be used only in a condition of an `if` statement and the
`if` statement should be equal to the methdo body (you can not
put any expression before and after the `if` statement).
A method entry with `mandatory_only?` (`Time.at` on the above case)
is marked as `iseq_overload`. When the method will be dispatch only
with mandatory arguments (`Time.at(0)` for example), make another
method entry with ISeq (2) as mandatory only method entry and it
will be cached in an inline method cache.
The idea is similar discussed in https://bugs.ruby-lang.org/issues/16254
but it only checks mandatory parameters or more, because many cases
only mandatory parameters are given. If we find other cases (optional
or keyword parameters are used frequently and it hurts performance),
we can extend the feature.
2021-11-12 20:12:20 +03:00
|
|
|
static const rb_iseq_t *
|
2020-08-11 20:22:43 +03:00
|
|
|
method_entry_iseqptr(const rb_callable_method_entry_t *me)
|
|
|
|
{
|
|
|
|
switch (me->def->type) {
|
|
|
|
case VM_METHOD_TYPE_ISEQ:
|
|
|
|
return me->def->body.iseq.iseqptr;
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
static rb_cref_t *
|
2020-08-11 20:22:43 +03:00
|
|
|
method_entry_cref(const rb_callable_method_entry_t *me)
|
2015-06-02 07:20:30 +03:00
|
|
|
{
|
|
|
|
switch (me->def->type) {
|
|
|
|
case VM_METHOD_TYPE_ISEQ:
|
|
|
|
return me->def->body.iseq.cref;
|
|
|
|
default:
|
|
|
|
return NULL;
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-08 20:44:51 +03:00
|
|
|
#if VM_CHECK_MODE == 0
|
|
|
|
PUREFUNC(static rb_cref_t *check_cref(VALUE, int));
|
|
|
|
#endif
|
2015-03-09 00:22:43 +03:00
|
|
|
static rb_cref_t *
|
2015-06-02 07:20:30 +03:00
|
|
|
check_cref(VALUE obj, int can_be_svar)
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
{
|
2015-06-02 07:20:30 +03:00
|
|
|
if (obj == Qfalse) return NULL;
|
|
|
|
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
if (!RB_TYPE_P(obj, T_IMEMO)) rb_bug("check_cref: unknown type: %s", rb_obj_info(obj));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
switch (imemo_type(obj)) {
|
|
|
|
case imemo_ment:
|
2019-10-03 06:26:41 +03:00
|
|
|
return method_entry_cref((rb_callable_method_entry_t *)obj);
|
2015-06-02 07:20:30 +03:00
|
|
|
case imemo_cref:
|
|
|
|
return (rb_cref_t *)obj;
|
|
|
|
case imemo_svar:
|
|
|
|
if (can_be_svar) {
|
|
|
|
return check_cref(((struct vm_svar *)obj)->cref_or_me, FALSE);
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
rb_bug("check_method_entry: svar should not be there:");
|
|
|
|
#endif
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-30 00:47:05 +03:00
|
|
|
static inline rb_cref_t *
|
2015-06-02 07:20:30 +03:00
|
|
|
vm_env_cref(const VALUE *ep)
|
|
|
|
{
|
|
|
|
rb_cref_t *cref;
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
while (!VM_ENV_LOCAL_P(ep)) {
|
|
|
|
if ((cref = check_cref(ep[VM_ENV_DATA_INDEX_ME_CREF], FALSE)) != NULL) return cref;
|
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
return check_cref(ep[VM_ENV_DATA_INDEX_ME_CREF], TRUE);
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
is_cref(const VALUE v, int can_be_svar)
|
|
|
|
{
|
|
|
|
if (RB_TYPE_P(v, T_IMEMO)) {
|
|
|
|
switch (imemo_type(v)) {
|
|
|
|
case imemo_cref:
|
|
|
|
return TRUE;
|
|
|
|
case imemo_svar:
|
|
|
|
if (can_be_svar) return is_cref(((struct vm_svar *)v)->cref_or_me, FALSE);
|
|
|
|
default:
|
|
|
|
break;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2015-06-02 07:20:30 +03:00
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vm_env_cref_by_cref(const VALUE *ep)
|
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
while (!VM_ENV_LOCAL_P(ep)) {
|
|
|
|
if (is_cref(ep[VM_ENV_DATA_INDEX_ME_CREF], FALSE)) return TRUE;
|
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
2008-06-06 18:48:07 +04:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
return is_cref(ep[VM_ENV_DATA_INDEX_ME_CREF], TRUE);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2015-11-20 03:17:25 +03:00
|
|
|
static rb_cref_t *
|
2016-07-28 14:02:30 +03:00
|
|
|
cref_replace_with_duplicated_cref_each_frame(const VALUE *vptr, int can_be_svar, VALUE parent)
|
2015-11-20 03:17:25 +03:00
|
|
|
{
|
|
|
|
const VALUE v = *vptr;
|
|
|
|
rb_cref_t *cref, *new_cref;
|
|
|
|
|
|
|
|
if (RB_TYPE_P(v, T_IMEMO)) {
|
|
|
|
switch (imemo_type(v)) {
|
|
|
|
case imemo_cref:
|
|
|
|
cref = (rb_cref_t *)v;
|
|
|
|
new_cref = vm_cref_dup(cref);
|
|
|
|
if (parent) {
|
|
|
|
RB_OBJ_WRITE(parent, vptr, new_cref);
|
|
|
|
}
|
|
|
|
else {
|
2016-07-28 14:02:30 +03:00
|
|
|
VM_FORCE_WRITE(vptr, (VALUE)new_cref);
|
2015-11-20 03:17:25 +03:00
|
|
|
}
|
|
|
|
return (rb_cref_t *)new_cref;
|
|
|
|
case imemo_svar:
|
|
|
|
if (can_be_svar) {
|
2021-10-19 11:09:32 +03:00
|
|
|
return cref_replace_with_duplicated_cref_each_frame(&((struct vm_svar *)v)->cref_or_me, FALSE, v);
|
2015-11-20 03:17:25 +03:00
|
|
|
}
|
2019-07-14 16:20:47 +03:00
|
|
|
/* fall through */
|
2015-11-20 03:17:25 +03:00
|
|
|
case imemo_ment:
|
|
|
|
rb_bug("cref_replace_with_duplicated_cref_each_frame: unreachable");
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-09-14 08:17:56 +03:00
|
|
|
return NULL;
|
2015-11-20 03:17:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static rb_cref_t *
|
|
|
|
vm_cref_replace_with_duplicated_cref(const VALUE *ep)
|
|
|
|
{
|
|
|
|
if (vm_env_cref_by_cref(ep)) {
|
|
|
|
rb_cref_t *cref;
|
2016-07-28 14:02:30 +03:00
|
|
|
VALUE envval;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
while (!VM_ENV_LOCAL_P(ep)) {
|
|
|
|
envval = VM_ENV_ESCAPED_P(ep) ? VM_ENV_ENVVAL(ep) : Qfalse;
|
|
|
|
if ((cref = cref_replace_with_duplicated_cref_each_frame(&ep[VM_ENV_DATA_INDEX_ME_CREF], FALSE, envval)) != NULL) {
|
2015-11-20 03:17:25 +03:00
|
|
|
return cref;
|
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
2015-11-20 03:17:25 +03:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
envval = VM_ENV_ESCAPED_P(ep) ? VM_ENV_ENVVAL(ep) : Qfalse;
|
|
|
|
return cref_replace_with_duplicated_cref_each_frame(&ep[VM_ENV_DATA_INDEX_ME_CREF], TRUE, envval);
|
2015-11-20 03:17:25 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_bug("vm_cref_dup: unreachable");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-02 07:20:30 +03:00
|
|
|
static rb_cref_t *
|
2019-04-05 11:15:11 +03:00
|
|
|
vm_get_cref(const VALUE *ep)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2015-06-02 07:20:30 +03:00
|
|
|
rb_cref_t *cref = vm_env_cref(ep);
|
2011-12-26 18:20:09 +04:00
|
|
|
|
2015-10-30 00:47:05 +03:00
|
|
|
if (cref != NULL) {
|
|
|
|
return cref;
|
|
|
|
}
|
|
|
|
else {
|
2019-04-05 11:15:21 +03:00
|
|
|
rb_bug("vm_get_cref: unreachable");
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-29 23:06:40 +03:00
|
|
|
rb_cref_t *
|
|
|
|
rb_vm_get_cref(const VALUE *ep)
|
|
|
|
{
|
|
|
|
return vm_get_cref(ep);
|
|
|
|
}
|
|
|
|
|
2019-04-05 11:15:11 +03:00
|
|
|
static rb_cref_t *
|
|
|
|
vm_ec_cref(const rb_execution_context_t *ec)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(ec, ec->cfp);
|
|
|
|
|
|
|
|
if (cfp == NULL) {
|
|
|
|
return NULL;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2019-04-05 11:15:11 +03:00
|
|
|
return vm_get_cref(cfp->ep);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-08-18 08:17:39 +04:00
|
|
|
|
2015-10-30 01:43:45 +03:00
|
|
|
static const rb_cref_t *
|
|
|
|
vm_get_const_key_cref(const VALUE *ep)
|
|
|
|
{
|
2019-04-05 11:15:11 +03:00
|
|
|
const rb_cref_t *cref = vm_get_cref(ep);
|
2015-10-30 01:43:45 +03:00
|
|
|
const rb_cref_t *key_cref = cref;
|
|
|
|
|
|
|
|
while (cref) {
|
2024-03-06 19:04:22 +03:00
|
|
|
if (RCLASS_SINGLETON_P(CREF_CLASS(cref)) ||
|
2023-04-14 23:25:06 +03:00
|
|
|
RCLASS_EXT(CREF_CLASS(cref))->cloned) {
|
2019-08-09 05:00:34 +03:00
|
|
|
return key_cref;
|
2015-10-30 01:43:45 +03:00
|
|
|
}
|
|
|
|
cref = CREF_NEXT(cref);
|
|
|
|
}
|
|
|
|
|
2015-12-14 05:51:13 +03:00
|
|
|
/* does not include singleton class */
|
2015-10-30 01:43:45 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-02-22 10:05:14 +03:00
|
|
|
void
|
2015-06-03 22:12:26 +03:00
|
|
|
rb_vm_rewrite_cref(rb_cref_t *cref, VALUE old_klass, VALUE new_klass, rb_cref_t **new_cref_ptr)
|
2015-02-22 10:05:14 +03:00
|
|
|
{
|
2015-06-03 22:12:26 +03:00
|
|
|
rb_cref_t *new_cref;
|
|
|
|
|
|
|
|
while (cref) {
|
|
|
|
if (CREF_CLASS(cref) == old_klass) {
|
2015-11-13 23:02:19 +03:00
|
|
|
new_cref = vm_cref_new_use_prev(new_klass, METHOD_VISI_UNDEF, FALSE, cref, FALSE);
|
2015-06-03 22:12:26 +03:00
|
|
|
*new_cref_ptr = new_cref;
|
2015-02-22 10:05:14 +03:00
|
|
|
return;
|
|
|
|
}
|
2015-11-13 23:02:19 +03:00
|
|
|
new_cref = vm_cref_new_use_prev(CREF_CLASS(cref), METHOD_VISI_UNDEF, FALSE, cref, FALSE);
|
2015-06-03 22:12:26 +03:00
|
|
|
cref = CREF_NEXT(cref);
|
|
|
|
*new_cref_ptr = new_cref;
|
2021-10-19 11:09:32 +03:00
|
|
|
new_cref_ptr = &new_cref->next;
|
2015-02-22 10:05:14 +03:00
|
|
|
}
|
|
|
|
*new_cref_ptr = NULL;
|
|
|
|
}
|
|
|
|
|
2015-03-09 00:22:43 +03:00
|
|
|
static rb_cref_t *
|
2021-12-03 02:53:39 +03:00
|
|
|
vm_cref_push(const rb_execution_context_t *ec, VALUE klass, const VALUE *ep, int pushed_by_eval, int singleton)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2015-11-13 23:02:19 +03:00
|
|
|
rb_cref_t *prev_cref = NULL;
|
2007-12-09 08:56:00 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
if (ep) {
|
|
|
|
prev_cref = vm_env_cref(ep);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
else {
|
2017-10-27 04:31:15 +03:00
|
|
|
rb_control_frame_t *cfp = vm_get_ruby_level_caller_cfp(ec, ec->cfp);
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
|
|
|
|
if (cfp) {
|
2015-06-02 07:20:30 +03:00
|
|
|
prev_cref = vm_env_cref(cfp->ep);
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
|
2021-12-03 02:53:39 +03:00
|
|
|
return vm_cref_new(klass, METHOD_VISI_PUBLIC, FALSE, prev_cref, pushed_by_eval, singleton);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
vm_get_cbase(const VALUE *ep)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2019-04-05 11:15:11 +03:00
|
|
|
const rb_cref_t *cref = vm_get_cref(ep);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
|
2021-12-03 02:53:39 +03:00
|
|
|
return CREF_CLASS_FOR_DEFINITION(cref);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
* fix namespace issue on singleton class expressions. [Bug #10943]
* vm_core.h, method.h: remove rb_iseq_t::cref_stack. CREF is stored
to rb_method_definition_t::body.iseq_body.cref.
* vm_insnhelper.c: modify SVAR usage.
When calling ISEQ type method, push CREF information onto method
frame, SVAR located place. Before this fix, SVAR is simply nil.
After this patch, CREF (or NULL == Qfalse for not iseq methods)
is stored at the method invocation.
When SVAR is requierd, then put NODE_IF onto SVAR location,
and NDOE_IF::nd_reserved points CREF itself.
* vm.c (vm_cref_new, vm_cref_dump, vm_cref_new_toplevel): added.
* vm_insnhelper.c (vm_push_frame): accept CREF.
* method.h, vm_method.c (rb_add_method_iseq): added. This function
accepts iseq and CREF.
* class.c (clone_method): use rb_add_method_iseq().
* gc.c (mark_method_entry): mark method_entry::body.iseq_body.cref.
* iseq.c: remove CREF related codes.
* insns.def (getinlinecache/setinlinecache): CREF should be cache key
because a different CREF has a different namespace.
* node.c (rb_gc_mark_node): mark NODE_IF::nd_reserved for SVAR.
* proc.c: catch up changes.
* struct.c: ditto.
* insns.def: ditto.
* vm_args.c (raise_argument_error): ditto.
* vm_eval.c: ditto.
* test/ruby/test_class.rb: add a test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@49874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-03-06 15:24:58 +03:00
|
|
|
vm_get_const_base(const VALUE *ep)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2019-04-05 11:15:11 +03:00
|
|
|
const rb_cref_t *cref = vm_get_cref(ep);
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
while (cref) {
|
2021-12-03 02:53:39 +03:00
|
|
|
if (!CREF_PUSHED_BY_EVAL(cref)) {
|
|
|
|
return CREF_CLASS_FOR_DEFINITION(cref);
|
|
|
|
}
|
|
|
|
cref = CREF_NEXT(cref);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2021-12-03 02:53:39 +03:00
|
|
|
return Qundef;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline void
|
|
|
|
vm_check_if_namespace(VALUE klass)
|
|
|
|
{
|
|
|
|
if (!RB_TYPE_P(klass, T_CLASS) && !RB_TYPE_P(klass, T_MODULE)) {
|
2016-01-24 13:36:16 +03:00
|
|
|
rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a class/module", klass);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-08 07:44:51 +03:00
|
|
|
static inline void
|
|
|
|
vm_ensure_not_refinement_module(VALUE self)
|
|
|
|
{
|
|
|
|
if (RB_TYPE_P(self, T_MODULE) && FL_TEST(self, RMODULE_IS_REFINEMENT)) {
|
|
|
|
rb_warn("not defined at the refinement, but at the outer class/module");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-24 21:19:22 +04:00
|
|
|
static inline VALUE
|
2021-06-01 20:34:06 +03:00
|
|
|
vm_get_iclass(const rb_control_frame_t *cfp, VALUE klass)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
return klass;
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
2019-11-25 09:05:53 +03:00
|
|
|
vm_get_ev_const(rb_execution_context_t *ec, VALUE orig_klass, ID id, bool allow_nil, int is_defined)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2016-05-15 04:57:28 +03:00
|
|
|
void rb_const_warn_if_deprecated(const rb_const_entry_t *ce, VALUE klass, ID id);
|
2012-10-14 23:58:59 +04:00
|
|
|
VALUE val;
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(orig_klass) && allow_nil) {
|
2012-10-14 23:58:59 +04:00
|
|
|
/* in current lexical scope */
|
2019-04-05 11:15:21 +03:00
|
|
|
const rb_cref_t *root_cref = vm_get_cref(ec->cfp->ep);
|
2015-03-09 00:22:43 +03:00
|
|
|
const rb_cref_t *cref;
|
2016-03-31 11:49:09 +03:00
|
|
|
VALUE klass = Qnil;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-03-08 22:50:37 +03:00
|
|
|
while (root_cref && CREF_PUSHED_BY_EVAL(root_cref)) {
|
|
|
|
root_cref = CREF_NEXT(root_cref);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
cref = root_cref;
|
2015-03-08 22:50:37 +03:00
|
|
|
while (cref && CREF_NEXT(cref)) {
|
|
|
|
if (CREF_PUSHED_BY_EVAL(cref)) {
|
2012-10-14 23:58:59 +04:00
|
|
|
klass = Qnil;
|
|
|
|
}
|
|
|
|
else {
|
2015-03-08 22:50:37 +03:00
|
|
|
klass = CREF_CLASS(cref);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2015-03-08 22:50:37 +03:00
|
|
|
cref = CREF_NEXT(cref);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (!NIL_P(klass)) {
|
|
|
|
VALUE av, am = 0;
|
2014-08-04 05:12:53 +04:00
|
|
|
rb_const_entry_t *ce;
|
2012-10-14 23:58:59 +04:00
|
|
|
search_continue:
|
2014-08-04 05:12:53 +04:00
|
|
|
if ((ce = rb_const_lookup(klass, id))) {
|
2016-05-15 04:57:28 +03:00
|
|
|
rb_const_warn_if_deprecated(ce, klass, id);
|
2014-08-04 05:12:53 +04:00
|
|
|
val = ce->value;
|
2022-11-15 07:24:08 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2012-10-14 23:58:59 +04:00
|
|
|
if (am == klass) break;
|
|
|
|
am = klass;
|
|
|
|
if (is_defined) return 1;
|
2018-02-13 12:34:43 +03:00
|
|
|
if (rb_autoloading_value(klass, id, &av, NULL)) return av;
|
2012-10-14 23:58:59 +04:00
|
|
|
rb_autoload_load(klass, id);
|
|
|
|
goto search_continue;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (is_defined) {
|
|
|
|
return 1;
|
|
|
|
}
|
2020-03-09 20:22:11 +03:00
|
|
|
else {
|
|
|
|
if (UNLIKELY(!rb_ractor_main_p())) {
|
|
|
|
if (!rb_ractor_shareable_p(val)) {
|
2020-12-21 12:06:28 +03:00
|
|
|
rb_raise(rb_eRactorIsolationError,
|
2020-10-26 01:56:10 +03:00
|
|
|
"can not access non-shareable objects in constant %"PRIsVALUE"::%s by non-main ractor.", rb_class_path(klass), rb_id2name(id));
|
2020-03-09 20:22:11 +03:00
|
|
|
}
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
return val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
/* search self */
|
2015-03-08 22:50:37 +03:00
|
|
|
if (root_cref && !NIL_P(CREF_CLASS(root_cref))) {
|
2017-10-27 08:18:58 +03:00
|
|
|
klass = vm_get_iclass(ec->cfp, CREF_CLASS(root_cref));
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
else {
|
2017-10-27 08:18:58 +03:00
|
|
|
klass = CLASS_OF(ec->cfp->self);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (is_defined) {
|
|
|
|
return rb_const_defined(klass, id);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_const_get(klass, id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
vm_check_if_namespace(orig_klass);
|
|
|
|
if (is_defined) {
|
|
|
|
return rb_public_const_defined_from(orig_klass, id);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_public_const_get_from(orig_klass, id);
|
|
|
|
}
|
2010-01-24 16:52:32 +03:00
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2022-12-10 01:12:15 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_get_ev_const(rb_execution_context_t *ec, VALUE orig_klass, ID id, VALUE allow_nil)
|
|
|
|
{
|
|
|
|
return vm_get_ev_const(ec, orig_klass, id, allow_nil == Qtrue, 0);
|
|
|
|
}
|
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
static inline VALUE
|
|
|
|
vm_get_ev_const_chain(rb_execution_context_t *ec, const ID *segments)
|
|
|
|
{
|
|
|
|
VALUE val = Qnil;
|
|
|
|
int idx = 0;
|
|
|
|
int allow_nil = TRUE;
|
|
|
|
if (segments[0] == idNULL) {
|
|
|
|
val = rb_cObject;
|
|
|
|
idx++;
|
|
|
|
allow_nil = FALSE;
|
|
|
|
}
|
|
|
|
while (segments[idx]) {
|
|
|
|
ID id = segments[idx++];
|
|
|
|
val = vm_get_ev_const(ec, val, id, allow_nil, 0);
|
|
|
|
allow_nil = FALSE;
|
|
|
|
}
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-06-24 21:19:22 +04:00
|
|
|
static inline VALUE
|
2021-06-01 20:34:06 +03:00
|
|
|
vm_get_cvar_base(const rb_cref_t *cref, const rb_control_frame_t *cfp, int top_level_raise)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2012-10-14 23:58:59 +04:00
|
|
|
VALUE klass;
|
2007-06-24 21:19:22 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (!cref) {
|
|
|
|
rb_bug("vm_get_cvar_base: no cref");
|
|
|
|
}
|
2011-06-30 17:34:53 +04:00
|
|
|
|
2015-03-08 22:50:37 +03:00
|
|
|
while (CREF_NEXT(cref) &&
|
2024-03-06 19:04:22 +03:00
|
|
|
(NIL_P(CREF_CLASS(cref)) || RCLASS_SINGLETON_P(CREF_CLASS(cref)) ||
|
2021-12-03 02:53:39 +03:00
|
|
|
CREF_PUSHED_BY_EVAL(cref) || CREF_SINGLETON(cref))) {
|
2015-03-08 22:50:37 +03:00
|
|
|
cref = CREF_NEXT(cref);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2020-03-28 01:08:52 +03:00
|
|
|
if (top_level_raise && !CREF_NEXT(cref)) {
|
|
|
|
rb_raise(rb_eRuntimeError, "class variable access from toplevel");
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2011-06-30 17:34:53 +04:00
|
|
|
|
2015-03-08 22:50:37 +03:00
|
|
|
klass = vm_get_iclass(cfp, CREF_CLASS(cref));
|
2011-06-30 17:34:53 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (NIL_P(klass)) {
|
|
|
|
rb_raise(rb_eTypeError, "no class variables available");
|
|
|
|
}
|
|
|
|
return klass;
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
ALWAYS_INLINE(static void fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, attr_index_t index, shape_id_t shape_id));
|
2020-12-11 10:37:20 +03:00
|
|
|
static inline void
|
2022-10-03 18:14:32 +03:00
|
|
|
fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, attr_index_t index, shape_id_t shape_id)
|
2020-12-11 10:37:20 +03:00
|
|
|
{
|
2022-10-03 18:14:32 +03:00
|
|
|
if (is_attr) {
|
2022-10-12 13:38:29 +03:00
|
|
|
vm_cc_attr_index_set(cc, index, shape_id);
|
2020-12-11 10:37:20 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 20:52:40 +03:00
|
|
|
vm_ic_attr_index_set(iseq, ic, index, shape_id);
|
2020-12-11 10:37:20 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-01 09:58:47 +03:00
|
|
|
#define ractor_incidental_shareable_p(cond, val) \
|
|
|
|
(!(cond) || rb_ractor_shareable_p(val))
|
|
|
|
#define ractor_object_incidental_shareable_p(obj, val) \
|
|
|
|
ractor_incidental_shareable_p(rb_ractor_shareable_p(obj), val)
|
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
#define ATTR_INDEX_NOT_SET (attr_index_t)-1
|
|
|
|
|
2023-02-28 15:44:57 +03:00
|
|
|
ALWAYS_INLINE(static VALUE vm_getivar(VALUE, ID, const rb_iseq_t *, IVC, const struct rb_callcache *, int, VALUE));
|
2016-08-13 04:21:29 +03:00
|
|
|
static inline VALUE
|
2023-02-28 15:44:57 +03:00
|
|
|
vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, VALUE default_value)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2018-10-14 14:25:36 +03:00
|
|
|
#if OPT_IC_FOR_IVAR
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
VALUE val = Qundef;
|
2022-10-03 18:14:32 +03:00
|
|
|
shape_id_t shape_id;
|
|
|
|
VALUE * ivar_list;
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
|
|
|
|
if (SPECIAL_CONST_P(obj)) {
|
2023-02-28 15:44:57 +03:00
|
|
|
return default_value;
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
}
|
2020-12-11 10:37:20 +03:00
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
#if SHAPE_IN_BASIC_FLAGS
|
|
|
|
shape_id = RBASIC_SHAPE_ID(obj);
|
|
|
|
#endif
|
2020-12-11 10:37:20 +03:00
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
switch (BUILTIN_TYPE(obj)) {
|
2022-10-12 12:27:23 +03:00
|
|
|
case T_OBJECT:
|
|
|
|
ivar_list = ROBJECT_IVPTR(obj);
|
|
|
|
VM_ASSERT(rb_ractor_shareable_p(obj) ? rb_ractor_shareable_p(val) : true);
|
2022-09-23 20:54:42 +03:00
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
#if !SHAPE_IN_BASIC_FLAGS
|
2022-10-12 12:27:23 +03:00
|
|
|
shape_id = ROBJECT_SHAPE_ID(obj);
|
2022-10-03 18:14:32 +03:00
|
|
|
#endif
|
2022-10-12 12:27:23 +03:00
|
|
|
break;
|
|
|
|
case T_CLASS:
|
|
|
|
case T_MODULE:
|
|
|
|
{
|
2022-11-01 00:05:37 +03:00
|
|
|
if (UNLIKELY(!rb_ractor_main_p())) {
|
|
|
|
// For two reasons we can only use the fast path on the main
|
|
|
|
// ractor.
|
|
|
|
// First, only the main ractor is allowed to set ivars on classes
|
|
|
|
// and modules. So we can skip locking.
|
|
|
|
// Second, other ractors need to check the shareability of the
|
|
|
|
// values returned from the class ivars.
|
2024-07-10 23:59:14 +03:00
|
|
|
|
|
|
|
if (default_value == Qundef) { // defined?
|
|
|
|
return rb_ivar_defined(obj, id) ? Qtrue : Qundef;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
goto general_path;
|
|
|
|
}
|
2022-11-01 00:05:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ivar_list = RCLASS_IVPTR(obj);
|
|
|
|
|
|
|
|
#if !SHAPE_IN_BASIC_FLAGS
|
|
|
|
shape_id = RCLASS_SHAPE_ID(obj);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
break;
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
if (FL_TEST_RAW(obj, FL_EXIVAR)) {
|
|
|
|
struct gen_ivtbl *ivtbl;
|
|
|
|
rb_gen_ivtbl_get(obj, id, &ivtbl);
|
2022-10-03 18:14:32 +03:00
|
|
|
#if !SHAPE_IN_BASIC_FLAGS
|
2022-10-12 12:27:23 +03:00
|
|
|
shape_id = ivtbl->shape_id;
|
2022-10-03 18:14:32 +03:00
|
|
|
#endif
|
2023-10-31 16:38:35 +03:00
|
|
|
ivar_list = ivtbl->as.shape.ivptr;
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
2023-02-28 15:44:57 +03:00
|
|
|
return default_value;
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
shape_id_t cached_id;
|
2022-10-03 20:52:40 +03:00
|
|
|
attr_index_t index;
|
2022-09-23 20:54:42 +03:00
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
if (is_attr) {
|
2022-10-12 13:38:29 +03:00
|
|
|
vm_cc_atomic_shape_and_index(cc, &cached_id, &index);
|
2022-09-23 20:54:42 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 20:52:40 +03:00
|
|
|
vm_ic_atomic_shape_and_index(ic, &cached_id, &index);
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
2022-09-23 20:54:42 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
if (LIKELY(cached_id == shape_id)) {
|
2022-12-09 01:16:52 +03:00
|
|
|
RUBY_ASSERT(cached_id != OBJ_TOO_COMPLEX_SHAPE_ID);
|
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
if (index == ATTR_INDEX_NOT_SET) {
|
2023-02-28 15:44:57 +03:00
|
|
|
return default_value;
|
2020-12-11 10:37:20 +03:00
|
|
|
}
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
val = ivar_list[index];
|
2023-02-15 01:41:23 +03:00
|
|
|
#if USE_DEBUG_COUNTER
|
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_ic_hit);
|
|
|
|
|
|
|
|
if (RB_TYPE_P(obj, T_OBJECT)) {
|
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_obj_hit);
|
|
|
|
}
|
|
|
|
#endif
|
2022-11-15 07:24:08 +03:00
|
|
|
RUBY_ASSERT(!UNDEF_P(val));
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
else { // cache miss case
|
2023-02-15 01:41:23 +03:00
|
|
|
#if USE_DEBUG_COUNTER
|
2022-10-03 18:14:32 +03:00
|
|
|
if (is_attr) {
|
|
|
|
if (cached_id != INVALID_SHAPE_ID) {
|
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_cc_miss_set);
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 18:14:32 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_cc_miss_unset);
|
2020-06-22 05:07:26 +03:00
|
|
|
}
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 18:14:32 +03:00
|
|
|
if (cached_id != INVALID_SHAPE_ID) {
|
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_ic_miss_set);
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 18:14:32 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_ic_miss_unset);
|
|
|
|
}
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
}
|
2023-02-15 01:41:23 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_ic_miss);
|
|
|
|
|
|
|
|
if (RB_TYPE_P(obj, T_OBJECT)) {
|
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_obj_miss);
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
#endif
|
|
|
|
|
2022-12-09 01:16:52 +03:00
|
|
|
if (shape_id == OBJ_TOO_COMPLEX_SHAPE_ID) {
|
2023-11-02 19:38:24 +03:00
|
|
|
st_table *table = NULL;
|
|
|
|
switch (BUILTIN_TYPE(obj)) {
|
|
|
|
case T_CLASS:
|
|
|
|
case T_MODULE:
|
|
|
|
table = (st_table *)RCLASS_IVPTR(obj);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case T_OBJECT:
|
|
|
|
table = ROBJECT_IV_HASH(obj);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default: {
|
|
|
|
struct gen_ivtbl *ivtbl;
|
|
|
|
if (rb_gen_ivtbl_get(obj, 0, &ivtbl)) {
|
|
|
|
table = ivtbl->as.complex.table;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!table || !st_lookup(table, id, &val)) {
|
2023-02-28 15:44:57 +03:00
|
|
|
val = default_value;
|
2022-12-09 01:16:52 +03:00
|
|
|
}
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
}
|
|
|
|
else {
|
2023-10-26 12:08:05 +03:00
|
|
|
shape_id_t previous_cached_id = cached_id;
|
|
|
|
if (rb_shape_get_iv_index_with_hint(shape_id, id, &index, &cached_id)) {
|
2022-12-09 01:16:52 +03:00
|
|
|
// This fills in the cache with the shared cache object.
|
|
|
|
// "ent" is the shared cache object
|
2023-10-26 12:08:05 +03:00
|
|
|
if (cached_id != previous_cached_id) {
|
|
|
|
fill_ivar_cache(iseq, ic, cc, is_attr, index, cached_id);
|
|
|
|
}
|
2022-12-09 01:16:52 +03:00
|
|
|
|
2023-10-26 12:08:05 +03:00
|
|
|
if (index == ATTR_INDEX_NOT_SET) {
|
|
|
|
val = default_value;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// We fetched the ivar list above
|
|
|
|
val = ivar_list[index];
|
|
|
|
RUBY_ASSERT(!UNDEF_P(val));
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-12-09 01:16:52 +03:00
|
|
|
if (is_attr) {
|
|
|
|
vm_cc_attr_index_initialize(cc, shape_id);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
vm_ic_attr_index_initialize(ic, shape_id);
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2023-02-28 15:44:57 +03:00
|
|
|
val = default_value;
|
2022-12-09 01:16:52 +03:00
|
|
|
}
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2017-03-15 11:25:58 +03:00
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (!UNDEF_P(default_value)) {
|
2023-02-28 15:44:57 +03:00
|
|
|
RUBY_ASSERT(!UNDEF_P(val));
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
|
|
|
return val;
|
|
|
|
|
|
|
|
general_path:
|
2018-10-14 14:25:36 +03:00
|
|
|
#endif /* OPT_IC_FOR_IVAR */
|
2017-03-15 11:25:58 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_get_ic_miss);
|
2017-02-21 11:18:15 +03:00
|
|
|
|
fastpath for ivar read of FL_EXIVAR objects.
vm_getivar() provides fastpath for T_OBJECT by caching an index
of ivar. This patch also provides fastpath for FL_EXIVAR objects.
FL_EXIVAR objects have an each ivar array and index can be cached
as T_OBJECT. To access this ivar array, generic_iv_tbl is exposed
by rb_ivar_generic_ivtbl() (declared in variable.h which is newly
introduced).
Benchmark script:
Benchmark.driver(repeat_count: 3){|x|
x.executable name: 'clean', command: %w'../clean/miniruby'
x.executable name: 'trunk', command: %w'./miniruby'
objs = [Object.new, 'str', {a: 1, b: 2}, [1, 2]]
objs.each.with_index{|obj, i|
rep = obj.inspect
rep = 'Object.new' if /\#/ =~ rep
x.prelude str = %Q{
v#{i} = #{rep}
def v#{i}.foo
@iv # ivar access method (attr_reader)
end
v#{i}.instance_variable_set(:@iv, :iv)
}
puts str
x.report %Q{
v#{i}.foo
}
}
}
Result:
v0.foo # T_OBJECT
clean: 85387141.8 i/s
trunk: 85249373.6 i/s - 1.00x slower
v1.foo # T_STRING
trunk: 57894407.5 i/s
clean: 39957178.6 i/s - 1.45x slower
v2.foo # T_HASH
trunk: 56629413.2 i/s
clean: 39227088.9 i/s - 1.44x slower
v3.foo # T_ARRAY
trunk: 55797530.2 i/s
clean: 38263572.9 i/s - 1.46x slower
2019-11-28 21:02:44 +03:00
|
|
|
if (is_attr) {
|
|
|
|
return rb_attr_get(obj, id);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_ivar_get(obj, id);
|
|
|
|
}
|
2010-09-23 04:01:40 +04:00
|
|
|
}
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
static void
|
2022-10-03 20:52:40 +03:00
|
|
|
populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr)
|
2022-10-03 18:14:32 +03:00
|
|
|
{
|
2022-12-09 01:16:52 +03:00
|
|
|
RUBY_ASSERT(next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID);
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
// Cache population code
|
|
|
|
if (is_attr) {
|
2022-10-12 13:38:29 +03:00
|
|
|
vm_cc_attr_index_set(cc, index, next_shape_id);
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 20:52:40 +03:00
|
|
|
vm_ic_attr_index_set(iseq, ic, index, next_shape_id);
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-16 08:13:32 +03:00
|
|
|
ALWAYS_INLINE(static VALUE vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr));
|
|
|
|
NOINLINE(static VALUE vm_setivar_slowpath_ivar(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic));
|
|
|
|
NOINLINE(static VALUE vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache *cc));
|
2020-12-16 06:03:36 +03:00
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr)
|
|
|
|
{
|
2022-10-01 02:01:50 +03:00
|
|
|
#if OPT_IC_FOR_IVAR
|
2023-02-15 01:41:23 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_ic_miss);
|
|
|
|
|
2023-11-23 11:44:47 +03:00
|
|
|
if (BUILTIN_TYPE(obj) == T_OBJECT) {
|
2024-05-30 15:55:32 +03:00
|
|
|
rb_check_frozen(obj);
|
2022-09-23 20:54:42 +03:00
|
|
|
|
2023-11-23 11:44:47 +03:00
|
|
|
attr_index_t index = rb_obj_ivar_set(obj, id, val);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2023-11-23 11:44:47 +03:00
|
|
|
shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj);
|
2022-11-08 23:35:31 +03:00
|
|
|
|
2023-11-23 11:44:47 +03:00
|
|
|
if (next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID) {
|
|
|
|
populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr);
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
|
2023-11-23 11:44:47 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_obj_miss);
|
|
|
|
return val;
|
2020-12-16 06:03:36 +03:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return rb_ivar_set(obj, id, val);
|
|
|
|
}
|
|
|
|
|
2020-12-16 08:13:32 +03:00
|
|
|
static VALUE
|
|
|
|
vm_setivar_slowpath_ivar(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic)
|
|
|
|
{
|
|
|
|
return vm_setivar_slowpath(obj, id, val, iseq, ic, NULL, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache *cc)
|
|
|
|
{
|
|
|
|
return vm_setivar_slowpath(obj, id, val, NULL, NULL, cc, true);
|
|
|
|
}
|
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
NOINLINE(static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index));
|
2022-10-03 18:14:32 +03:00
|
|
|
static VALUE
|
2022-10-03 20:52:40 +03:00
|
|
|
vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)
|
2022-10-03 18:14:32 +03:00
|
|
|
{
|
|
|
|
#if SHAPE_IN_BASIC_FLAGS
|
|
|
|
shape_id_t shape_id = RBASIC_SHAPE_ID(obj);
|
|
|
|
#else
|
|
|
|
shape_id_t shape_id = rb_generic_shape_id(obj);
|
|
|
|
#endif
|
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
struct gen_ivtbl *ivtbl = 0;
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
// Cache hit case
|
2022-10-03 20:52:40 +03:00
|
|
|
if (shape_id == dest_shape_id) {
|
2022-10-03 18:14:32 +03:00
|
|
|
RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID);
|
2022-10-03 20:52:40 +03:00
|
|
|
}
|
|
|
|
else if (dest_shape_id != INVALID_SHAPE_ID) {
|
2023-11-11 00:17:39 +03:00
|
|
|
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
|
2023-11-03 15:53:50 +03:00
|
|
|
rb_shape_t *dest_shape = rb_shape_get_shape_by_id(dest_shape_id);
|
2022-10-03 20:52:40 +03:00
|
|
|
|
2023-11-11 00:17:39 +03:00
|
|
|
if (shape_id == dest_shape->parent_id && dest_shape->edge_name == id && shape->capacity == dest_shape->capacity) {
|
|
|
|
RUBY_ASSERT(index < dest_shape->capacity);
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 20:52:40 +03:00
|
|
|
return Qundef;
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
2022-10-03 20:52:40 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2023-11-03 15:53:50 +03:00
|
|
|
rb_gen_ivtbl_get(obj, 0, &ivtbl);
|
|
|
|
|
|
|
|
if (shape_id != dest_shape_id) {
|
|
|
|
#if SHAPE_IN_BASIC_FLAGS
|
|
|
|
RBASIC_SET_SHAPE_ID(obj, dest_shape_id);
|
|
|
|
#else
|
|
|
|
ivtbl->shape_id = dest_shape_id;
|
|
|
|
#endif
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2023-11-03 15:53:50 +03:00
|
|
|
RB_OBJ_WRITE(obj, &ivtbl->as.shape.ivptr[index], val);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_ic_hit);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
return val;
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
|
2016-04-24 01:30:42 +03:00
|
|
|
static inline VALUE
|
2022-10-03 20:52:40 +03:00
|
|
|
vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)
|
2010-09-23 04:01:40 +04:00
|
|
|
{
|
2018-10-14 14:25:36 +03:00
|
|
|
#if OPT_IC_FOR_IVAR
|
2022-10-03 18:14:32 +03:00
|
|
|
switch (BUILTIN_TYPE(obj)) {
|
|
|
|
case T_OBJECT:
|
2022-10-12 12:27:23 +03:00
|
|
|
{
|
|
|
|
VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj));
|
2022-10-03 20:52:40 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
shape_id_t shape_id = ROBJECT_SHAPE_ID(obj);
|
2022-12-09 01:16:52 +03:00
|
|
|
RUBY_ASSERT(dest_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
if (LIKELY(shape_id == dest_shape_id)) {
|
|
|
|
RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID);
|
|
|
|
VM_ASSERT(!rb_ractor_shareable_p(obj));
|
|
|
|
}
|
|
|
|
else if (dest_shape_id != INVALID_SHAPE_ID) {
|
2023-11-11 00:17:39 +03:00
|
|
|
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
|
2022-10-12 12:27:23 +03:00
|
|
|
rb_shape_t *dest_shape = rb_shape_get_shape_by_id(dest_shape_id);
|
|
|
|
shape_id_t source_shape_id = dest_shape->parent_id;
|
2022-11-08 23:35:31 +03:00
|
|
|
|
2023-11-11 00:17:39 +03:00
|
|
|
if (shape_id == source_shape_id && dest_shape->edge_name == id && shape->capacity == dest_shape->capacity) {
|
2022-10-03 18:14:32 +03:00
|
|
|
RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID);
|
2022-10-03 20:52:40 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
ROBJECT_SET_SHAPE_ID(obj, dest_shape_id);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2022-11-08 23:35:31 +03:00
|
|
|
RUBY_ASSERT(rb_shape_get_next_iv_shape(rb_shape_get_shape_by_id(source_shape_id), id) == dest_shape);
|
2022-11-08 22:09:43 +03:00
|
|
|
RUBY_ASSERT(index < dest_shape->capacity);
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 20:52:40 +03:00
|
|
|
break;
|
|
|
|
}
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
2020-12-11 10:37:20 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
VALUE *ptr = ROBJECT_IVPTR(obj);
|
* common.mk: clean up
- remove blockinlining.$(OBJEXT) to built
- make ENCODING_H_INCLDUES variable (include/ruby/encoding.h)
- make VM_CORE_H_INCLUDES variable (vm_core.h)
- simplify rules.
- make depends rule to output depend status using gcc -MM.
* include/ruby/mvm.h, include/ruby/vm.h: rename mvm.h to vm.h.
* include/ruby.h: ditto.
* load.c: add inclusion explicitly.
* enumerator.c, object.c, parse.y, thread.c, vm_dump.c:
remove useless inclusion.
* eval_intern.h: cleanup inclusion.
* vm_core.h: rb_thread_t should be defined in this file.
* vm_evalbody.c, vm_exec.c: rename vm_evalbody.c to vm_exec.c.
* vm.h, vm_exec.h: rename vm.h to vm_exec.h.
* insnhelper.h, vm_insnhelper.h: rename insnhelper.h to vm_insnhelper.h.
* vm.c, vm_insnhelper.c, vm_insnhelper.h:
- rename vm_eval() to vm_exec_core().
- rename vm_eval_body() to vm_exec().
- cleanup include order.
* vm_method.c: fix comment.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19466 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-09-23 04:20:28 +04:00
|
|
|
|
2022-12-09 01:16:52 +03:00
|
|
|
RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
|
2022-10-12 12:27:23 +03:00
|
|
|
RB_OBJ_WRITE(obj, &ptr[index], val);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
2022-10-12 12:27:23 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_ic_hit);
|
2023-02-15 01:41:23 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_obj_hit);
|
2022-10-12 12:27:23 +03:00
|
|
|
return val;
|
|
|
|
}
|
|
|
|
break;
|
2022-10-03 18:14:32 +03:00
|
|
|
case T_CLASS:
|
|
|
|
case T_MODULE:
|
2017-03-15 11:25:58 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ivar_set_ic_miss_noobject);
|
2022-10-03 18:14:32 +03:00
|
|
|
default:
|
|
|
|
break;
|
2017-03-15 11:25:58 +03:00
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
|
|
|
|
return Qundef;
|
2018-10-14 14:25:36 +03:00
|
|
|
#endif /* OPT_IC_FOR_IVAR */
|
2020-12-16 06:03:36 +03:00
|
|
|
}
|
|
|
|
|
2021-06-23 03:39:04 +03:00
|
|
|
static VALUE
|
2023-06-02 21:25:19 +03:00
|
|
|
update_classvariable_cache(const rb_iseq_t *iseq, VALUE klass, ID id, const rb_cref_t * cref, ICVARC ic)
|
2021-06-01 20:34:06 +03:00
|
|
|
{
|
|
|
|
VALUE defined_class = 0;
|
|
|
|
VALUE cvar_value = rb_cvar_find(klass, id, &defined_class);
|
|
|
|
|
|
|
|
if (RB_TYPE_P(defined_class, T_ICLASS)) {
|
|
|
|
defined_class = RBASIC(defined_class)->klass;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct rb_id_table *rb_cvc_tbl = RCLASS_CVC_TBL(defined_class);
|
|
|
|
if (!rb_cvc_tbl) {
|
2021-06-23 03:39:04 +03:00
|
|
|
rb_bug("the cvc table should be set");
|
2021-06-01 20:34:06 +03:00
|
|
|
}
|
|
|
|
|
2021-06-23 03:39:04 +03:00
|
|
|
VALUE ent_data;
|
|
|
|
if (!rb_id_table_lookup(rb_cvc_tbl, id, &ent_data)) {
|
2021-06-01 20:34:06 +03:00
|
|
|
rb_bug("should have cvar cache entry");
|
|
|
|
}
|
|
|
|
|
2021-06-23 03:39:04 +03:00
|
|
|
struct rb_cvar_class_tbl_entry *ent = (void *)ent_data;
|
2023-02-07 23:46:50 +03:00
|
|
|
|
2023-06-02 21:25:19 +03:00
|
|
|
ent->global_cvar_state = GET_GLOBAL_CVAR_STATE();
|
|
|
|
ent->cref = cref;
|
2023-06-02 00:55:36 +03:00
|
|
|
ic->entry = ent;
|
2023-06-02 21:25:19 +03:00
|
|
|
|
|
|
|
RUBY_ASSERT(BUILTIN_TYPE((VALUE)cref) == T_IMEMO && IMEMO_TYPE_P(cref, imemo_cref));
|
|
|
|
RB_OBJ_WRITTEN(iseq, Qundef, ent->cref);
|
2021-06-01 20:34:06 +03:00
|
|
|
RB_OBJ_WRITTEN(iseq, Qundef, ent->class_value);
|
2023-06-02 22:26:02 +03:00
|
|
|
RB_OBJ_WRITTEN(ent->class_value, Qundef, ent->cref);
|
2021-06-01 20:34:06 +03:00
|
|
|
|
|
|
|
return cvar_value;
|
|
|
|
}
|
|
|
|
|
2021-06-23 03:39:04 +03:00
|
|
|
static inline VALUE
|
2021-11-18 20:11:53 +03:00
|
|
|
vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *reg_cfp, ID id, ICVARC ic)
|
2021-06-23 03:39:04 +03:00
|
|
|
{
|
2021-11-18 20:11:53 +03:00
|
|
|
const rb_cref_t *cref;
|
2023-06-02 21:25:19 +03:00
|
|
|
cref = vm_get_cref(GET_EP());
|
2021-11-18 20:11:53 +03:00
|
|
|
|
2023-06-02 21:25:19 +03:00
|
|
|
if (ic->entry && ic->entry->global_cvar_state == GET_GLOBAL_CVAR_STATE() && ic->entry->cref == cref && LIKELY(rb_ractor_main_p())) {
|
2021-06-23 03:39:04 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(cvar_read_inline_hit);
|
|
|
|
|
2022-11-01 00:05:37 +03:00
|
|
|
VALUE v = rb_ivar_lookup(ic->entry->class_value, id, Qundef);
|
2022-11-15 07:24:08 +03:00
|
|
|
RUBY_ASSERT(!UNDEF_P(v));
|
2021-12-24 06:26:21 +03:00
|
|
|
|
2022-11-01 00:05:37 +03:00
|
|
|
return v;
|
2021-06-23 03:39:04 +03:00
|
|
|
}
|
|
|
|
|
2021-11-18 20:11:53 +03:00
|
|
|
VALUE klass = vm_get_cvar_base(cref, reg_cfp, 1);
|
2021-06-23 03:39:04 +03:00
|
|
|
|
2023-06-02 21:25:19 +03:00
|
|
|
return update_classvariable_cache(iseq, klass, id, cref, ic);
|
2021-06-23 03:39:04 +03:00
|
|
|
}
|
|
|
|
|
2021-09-29 23:06:40 +03:00
|
|
|
VALUE
|
2021-11-18 20:11:53 +03:00
|
|
|
rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic)
|
2021-09-29 23:06:40 +03:00
|
|
|
{
|
2021-11-18 20:11:53 +03:00
|
|
|
return vm_getclassvariable(iseq, cfp, id, ic);
|
2021-09-29 23:06:40 +03:00
|
|
|
}
|
|
|
|
|
2021-06-01 20:34:06 +03:00
|
|
|
static inline void
|
2021-11-19 00:17:40 +03:00
|
|
|
vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *reg_cfp, ID id, VALUE val, ICVARC ic)
|
2021-06-01 20:34:06 +03:00
|
|
|
{
|
2021-11-19 00:17:40 +03:00
|
|
|
const rb_cref_t *cref;
|
2023-06-02 21:25:19 +03:00
|
|
|
cref = vm_get_cref(GET_EP());
|
2021-11-19 00:17:40 +03:00
|
|
|
|
2023-06-02 21:25:19 +03:00
|
|
|
if (ic->entry && ic->entry->global_cvar_state == GET_GLOBAL_CVAR_STATE() && ic->entry->cref == cref && LIKELY(rb_ractor_main_p())) {
|
2021-06-01 20:34:06 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(cvar_write_inline_hit);
|
|
|
|
|
|
|
|
rb_class_ivar_set(ic->entry->class_value, id, val);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-11-19 00:17:40 +03:00
|
|
|
VALUE klass = vm_get_cvar_base(cref, reg_cfp, 1);
|
2021-06-01 20:34:06 +03:00
|
|
|
|
|
|
|
rb_cvar_set(klass, id, val);
|
|
|
|
|
2023-06-02 21:25:19 +03:00
|
|
|
update_classvariable_cache(iseq, klass, id, cref, ic);
|
2021-06-01 20:34:06 +03:00
|
|
|
}
|
|
|
|
|
2021-11-23 22:09:24 +03:00
|
|
|
void
|
|
|
|
rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic)
|
|
|
|
{
|
|
|
|
vm_setclassvariable(iseq, cfp, id, val, ic);
|
|
|
|
}
|
|
|
|
|
2016-05-11 18:04:27 +03:00
|
|
|
static inline VALUE
|
2020-10-16 09:20:40 +03:00
|
|
|
vm_getinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, IVC ic)
|
2012-10-16 21:07:23 +04:00
|
|
|
{
|
2023-02-28 15:44:57 +03:00
|
|
|
return vm_getivar(obj, id, iseq, ic, NULL, FALSE, Qnil);
|
2012-10-16 21:07:23 +04:00
|
|
|
}
|
|
|
|
|
2016-05-11 18:04:27 +03:00
|
|
|
static inline void
|
2020-10-16 09:20:40 +03:00
|
|
|
vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic)
|
2012-10-16 21:07:23 +04:00
|
|
|
{
|
2023-01-16 17:32:37 +03:00
|
|
|
if (RB_SPECIAL_CONST_P(obj)) {
|
|
|
|
rb_error_frozen_object(obj);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-10-03 20:52:40 +03:00
|
|
|
shape_id_t dest_shape_id;
|
|
|
|
attr_index_t index;
|
|
|
|
vm_ic_atomic_shape_and_index(ic, &dest_shape_id, &index);
|
|
|
|
|
2022-11-15 07:24:08 +03:00
|
|
|
if (UNLIKELY(UNDEF_P(vm_setivar(obj, id, val, dest_shape_id, index)))) {
|
2022-10-03 18:14:32 +03:00
|
|
|
switch (BUILTIN_TYPE(obj)) {
|
2022-10-12 12:27:23 +03:00
|
|
|
case T_OBJECT:
|
|
|
|
case T_CLASS:
|
|
|
|
case T_MODULE:
|
|
|
|
break;
|
|
|
|
default:
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(vm_setivar_default(obj, id, val, dest_shape_id, index))) {
|
2022-10-12 12:27:23 +03:00
|
|
|
return;
|
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
vm_setivar_slowpath_ivar(obj, id, val, iseq, ic);
|
|
|
|
}
|
2012-10-16 21:07:23 +04:00
|
|
|
}
|
|
|
|
|
2021-05-11 02:43:30 +03:00
|
|
|
void
|
|
|
|
rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic)
|
|
|
|
{
|
|
|
|
vm_setinstancevariable(iseq, obj, id, val, ic);
|
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2017-10-27 04:35:12 +03:00
|
|
|
vm_throw_continue(const rb_execution_context_t *ec, VALUE err)
|
2009-12-03 21:25:57 +03:00
|
|
|
{
|
2015-01-16 05:54:22 +03:00
|
|
|
/* continue throw */
|
2009-12-03 21:25:57 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
if (FIXNUM_P(err)) {
|
2023-05-18 17:33:42 +03:00
|
|
|
ec->tag->state = RUBY_TAG_FATAL;
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
else if (SYMBOL_P(err)) {
|
2017-10-27 04:35:12 +03:00
|
|
|
ec->tag->state = TAG_THROW;
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
2015-03-10 21:39:46 +03:00
|
|
|
else if (THROW_DATA_P(err)) {
|
2017-10-27 04:35:12 +03:00
|
|
|
ec->tag->state = THROW_DATA_STATE((struct vm_throw_data *)err);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
else {
|
2017-10-27 04:35:12 +03:00
|
|
|
ec->tag->state = TAG_RAISE;
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
2009-12-03 21:25:57 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
static VALUE
|
2017-10-27 04:35:12 +03:00
|
|
|
vm_throw_start(const rb_execution_context_t *ec, rb_control_frame_t *const reg_cfp, enum ruby_tag_type state,
|
2018-10-17 10:16:58 +03:00
|
|
|
const int flag, const VALUE throwobj)
|
2015-01-16 05:54:22 +03:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
const rb_control_frame_t *escape_cfp = NULL;
|
2017-10-27 04:35:12 +03:00
|
|
|
const rb_control_frame_t * const eocfp = RUBY_VM_END_CONTROL_FRAME(ec); /* end of control frame pointer */
|
2015-01-16 05:54:22 +03:00
|
|
|
|
|
|
|
if (flag != 0) {
|
|
|
|
/* do nothing */
|
|
|
|
}
|
|
|
|
else if (state == TAG_BREAK) {
|
|
|
|
int is_orphan = 1;
|
2016-07-28 14:02:30 +03:00
|
|
|
const VALUE *ep = GET_EP();
|
2015-07-16 16:13:50 +03:00
|
|
|
const rb_iseq_t *base_iseq = GET_ISEQ();
|
2015-01-16 05:54:22 +03:00
|
|
|
escape_cfp = reg_cfp;
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
while (ISEQ_BODY(base_iseq)->type != ISEQ_TYPE_BLOCK) {
|
|
|
|
if (ISEQ_BODY(escape_cfp->iseq)->type == ISEQ_TYPE_CLASS) {
|
2015-01-16 05:54:22 +03:00
|
|
|
escape_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(escape_cfp);
|
|
|
|
ep = escape_cfp->ep;
|
|
|
|
base_iseq = escape_cfp->iseq;
|
|
|
|
}
|
|
|
|
else {
|
2016-07-28 14:02:30 +03:00
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
2022-03-23 22:19:48 +03:00
|
|
|
base_iseq = ISEQ_BODY(base_iseq)->parent_iseq;
|
2017-10-27 04:35:12 +03:00
|
|
|
escape_cfp = rb_vm_search_cf_from_ep(ec, escape_cfp, ep);
|
2015-06-11 02:42:01 +03:00
|
|
|
VM_ASSERT(escape_cfp->iseq == base_iseq);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2017-06-03 13:07:44 +03:00
|
|
|
if (VM_FRAME_LAMBDA_P(escape_cfp)) {
|
2015-01-16 05:54:22 +03:00
|
|
|
/* lambda{... break ...} */
|
|
|
|
is_orphan = 0;
|
|
|
|
state = TAG_RETURN;
|
|
|
|
}
|
|
|
|
else {
|
2016-07-28 14:02:30 +03:00
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
while (escape_cfp < eocfp) {
|
|
|
|
if (escape_cfp->ep == ep) {
|
2017-06-27 06:45:55 +03:00
|
|
|
const rb_iseq_t *const iseq = escape_cfp->iseq;
|
2022-03-23 22:19:48 +03:00
|
|
|
const VALUE epc = escape_cfp->pc - ISEQ_BODY(iseq)->iseq_encoded;
|
|
|
|
const struct iseq_catch_table *const ct = ISEQ_BODY(iseq)->catch_table;
|
2017-06-27 06:45:55 +03:00
|
|
|
unsigned int i;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2017-06-27 06:45:55 +03:00
|
|
|
if (!ct) break;
|
|
|
|
for (i=0; i < ct->size; i++) {
|
2019-05-31 09:58:50 +03:00
|
|
|
const struct iseq_catch_table_entry *const entry =
|
|
|
|
UNALIGNED_MEMBER_PTR(ct, entries[i]);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2017-06-08 07:13:51 +03:00
|
|
|
if (entry->type == CATCH_TYPE_BREAK &&
|
|
|
|
entry->iseq == base_iseq &&
|
|
|
|
entry->start < epc && entry->end >= epc) {
|
2015-01-16 05:54:22 +03:00
|
|
|
if (entry->cont == epc) { /* found! */
|
|
|
|
is_orphan = 0;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
break;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
escape_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(escape_cfp);
|
2011-03-31 13:07:42 +04:00
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
if (is_orphan) {
|
|
|
|
rb_vm_localjump_error("break from proc-closure", throwobj, TAG_BREAK);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (state == TAG_RETRY) {
|
2016-07-28 14:02:30 +03:00
|
|
|
const VALUE *ep = VM_ENV_PREV_EP(GET_EP());
|
2015-01-16 05:54:22 +03:00
|
|
|
|
2017-10-27 04:35:12 +03:00
|
|
|
escape_cfp = rb_vm_search_cf_from_ep(ec, reg_cfp, ep);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
else if (state == TAG_RETURN) {
|
2016-07-28 14:02:30 +03:00
|
|
|
const VALUE *current_ep = GET_EP();
|
2021-04-01 20:28:00 +03:00
|
|
|
const VALUE *target_ep = NULL, *target_lep, *ep = current_ep;
|
2015-01-16 05:54:22 +03:00
|
|
|
int in_class_frame = 0;
|
2017-06-27 09:57:34 +03:00
|
|
|
int toplevel = 1;
|
2015-01-16 05:54:22 +03:00
|
|
|
escape_cfp = reg_cfp;
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2021-04-01 20:28:00 +03:00
|
|
|
// find target_lep, target_ep
|
|
|
|
while (!VM_ENV_LOCAL_P(ep)) {
|
|
|
|
if (VM_ENV_FLAGS(ep, VM_FRAME_FLAG_LAMBDA) && target_ep == NULL) {
|
|
|
|
target_ep = ep;
|
|
|
|
}
|
|
|
|
ep = VM_ENV_PREV_EP(ep);
|
|
|
|
}
|
|
|
|
target_lep = ep;
|
|
|
|
|
|
|
|
while (escape_cfp < eocfp) {
|
|
|
|
const VALUE *lep = VM_CF_LEP(escape_cfp);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
if (!target_lep) {
|
|
|
|
target_lep = lep;
|
|
|
|
}
|
|
|
|
|
2016-06-06 12:37:59 +03:00
|
|
|
if (lep == target_lep &&
|
2016-08-03 04:50:50 +03:00
|
|
|
VM_FRAME_RUBYFRAME_P(escape_cfp) &&
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(escape_cfp->iseq)->type == ISEQ_TYPE_CLASS) {
|
2015-01-16 05:54:22 +03:00
|
|
|
in_class_frame = 1;
|
|
|
|
target_lep = 0;
|
|
|
|
}
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
if (lep == target_lep) {
|
2017-06-03 13:07:44 +03:00
|
|
|
if (VM_FRAME_LAMBDA_P(escape_cfp)) {
|
2017-06-27 09:57:34 +03:00
|
|
|
toplevel = 0;
|
2015-01-16 05:54:22 +03:00
|
|
|
if (in_class_frame) {
|
|
|
|
/* lambda {class A; ... return ...; end} */
|
2021-04-01 20:28:00 +03:00
|
|
|
goto valid_return;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
else {
|
2016-07-28 14:02:30 +03:00
|
|
|
const VALUE *tep = current_ep;
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2015-01-16 05:54:22 +03:00
|
|
|
while (target_lep != tep) {
|
|
|
|
if (escape_cfp->ep == tep) {
|
|
|
|
/* in lambda */
|
2021-04-01 20:28:00 +03:00
|
|
|
if (tep == target_ep) {
|
|
|
|
goto valid_return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
goto unexpected_return;
|
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
tep = VM_ENV_PREV_EP(tep);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2017-06-27 09:57:34 +03:00
|
|
|
else if (VM_FRAME_RUBYFRAME_P(escape_cfp)) {
|
2022-03-23 22:19:48 +03:00
|
|
|
switch (ISEQ_BODY(escape_cfp->iseq)->type) {
|
2017-06-27 09:57:34 +03:00
|
|
|
case ISEQ_TYPE_TOP:
|
|
|
|
case ISEQ_TYPE_MAIN:
|
2019-10-02 17:56:28 +03:00
|
|
|
if (toplevel) {
|
|
|
|
if (in_class_frame) goto unexpected_return;
|
2021-04-01 20:28:00 +03:00
|
|
|
if (target_ep == NULL) {
|
|
|
|
goto valid_return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
goto unexpected_return;
|
|
|
|
}
|
2019-10-02 17:56:28 +03:00
|
|
|
}
|
2017-06-27 09:57:34 +03:00
|
|
|
break;
|
2023-10-25 22:49:28 +03:00
|
|
|
case ISEQ_TYPE_EVAL: {
|
|
|
|
const rb_iseq_t *is = escape_cfp->iseq;
|
|
|
|
enum rb_iseq_type t = ISEQ_BODY(is)->type;
|
|
|
|
while (t == ISEQ_TYPE_RESCUE || t == ISEQ_TYPE_ENSURE || t == ISEQ_TYPE_EVAL) {
|
|
|
|
if (!(is = ISEQ_BODY(is)->parent_iseq)) break;
|
|
|
|
t = ISEQ_BODY(is)->type;
|
|
|
|
}
|
|
|
|
toplevel = t == ISEQ_TYPE_TOP || t == ISEQ_TYPE_MAIN;
|
|
|
|
break;
|
|
|
|
}
|
2017-06-27 09:57:34 +03:00
|
|
|
case ISEQ_TYPE_CLASS:
|
|
|
|
toplevel = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
if (escape_cfp->ep == target_lep && ISEQ_BODY(escape_cfp->iseq)->type == ISEQ_TYPE_METHOD) {
|
2021-04-01 20:28:00 +03:00
|
|
|
if (target_ep == NULL) {
|
|
|
|
goto valid_return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
goto unexpected_return;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2015-01-16 05:54:22 +03:00
|
|
|
|
|
|
|
escape_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(escape_cfp);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2019-10-02 17:56:28 +03:00
|
|
|
unexpected_return:;
|
2015-01-16 05:54:22 +03:00
|
|
|
rb_vm_localjump_error("unexpected return", throwobj, TAG_RETURN);
|
|
|
|
|
|
|
|
valid_return:;
|
|
|
|
/* do nothing */
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
else {
|
2019-12-20 03:19:39 +03:00
|
|
|
rb_bug("isns(throw): unsupported throw type");
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2017-10-27 04:35:12 +03:00
|
|
|
ec->tag->state = state;
|
2015-03-11 15:49:27 +03:00
|
|
|
return (VALUE)THROW_DATA_NEW(throwobj, escape_cfp, state);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2017-10-27 04:35:12 +03:00
|
|
|
vm_throw(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
2015-01-16 05:54:22 +03:00
|
|
|
rb_num_t throw_state, VALUE throwobj)
|
|
|
|
{
|
2015-07-20 03:08:23 +03:00
|
|
|
const int state = (int)(throw_state & VM_THROW_STATE_MASK);
|
|
|
|
const int flag = (int)(throw_state & VM_THROW_NO_ESCAPE_FLAG);
|
2015-01-16 05:54:22 +03:00
|
|
|
|
|
|
|
if (state != 0) {
|
2018-10-17 10:16:58 +03:00
|
|
|
return vm_throw_start(ec, reg_cfp, state, flag, throwobj);
|
2015-01-16 05:54:22 +03:00
|
|
|
}
|
|
|
|
else {
|
2017-10-27 04:35:12 +03:00
|
|
|
return vm_throw_continue(ec, throwobj);
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-14 23:39:06 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_throw(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t throw_state, VALUE throwobj)
|
|
|
|
{
|
|
|
|
return vm_throw(ec, reg_cfp, throw_state, throwobj);
|
|
|
|
}
|
|
|
|
|
2018-07-17 19:20:15 +03:00
|
|
|
static inline void
|
2023-11-30 18:51:51 +03:00
|
|
|
vm_expandarray(struct rb_control_frame_struct *cfp, VALUE ary, rb_num_t num, int flag)
|
2007-06-24 21:19:22 +04:00
|
|
|
{
|
2012-10-14 23:58:59 +04:00
|
|
|
int is_splat = flag & 0x01;
|
2013-07-24 13:57:49 +04:00
|
|
|
const VALUE *ptr;
|
2012-10-14 23:58:59 +04:00
|
|
|
rb_num_t len;
|
2018-01-27 08:35:47 +03:00
|
|
|
const VALUE obj = ary;
|
2008-05-19 22:47:56 +04:00
|
|
|
|
2018-01-27 08:35:47 +03:00
|
|
|
if (!RB_TYPE_P(ary, T_ARRAY) && NIL_P(ary = rb_check_array_type(ary))) {
|
|
|
|
ary = obj;
|
|
|
|
ptr = &ary;
|
|
|
|
len = 1;
|
|
|
|
}
|
|
|
|
else {
|
2023-07-13 16:45:35 +03:00
|
|
|
ptr = RARRAY_CONST_PTR(ary);
|
2018-01-27 08:35:47 +03:00
|
|
|
len = (rb_num_t)RARRAY_LEN(ary);
|
2011-09-20 13:09:00 +04:00
|
|
|
}
|
|
|
|
|
2023-11-30 18:51:51 +03:00
|
|
|
if (num + is_splat == 0) {
|
2018-11-12 11:14:09 +03:00
|
|
|
/* no space left on stack */
|
|
|
|
}
|
|
|
|
else if (flag & 0x02) {
|
2012-10-14 23:58:59 +04:00
|
|
|
/* post: ..., nil ,ary[-1], ..., ary[0..-num] # top */
|
|
|
|
rb_num_t i = 0, j;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (len < num) {
|
2023-11-30 18:51:51 +03:00
|
|
|
for (i = 0; i < num - len; i++) {
|
|
|
|
*cfp->sp++ = Qnil;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
}
|
2023-11-30 18:51:51 +03:00
|
|
|
|
|
|
|
for (j = 0; i < num; i++, j++) {
|
2012-10-14 23:58:59 +04:00
|
|
|
VALUE v = ptr[len - j - 1];
|
2023-11-30 18:51:51 +03:00
|
|
|
*cfp->sp++ = v;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2023-11-30 18:51:51 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
if (is_splat) {
|
2023-11-30 18:51:51 +03:00
|
|
|
*cfp->sp++ = rb_ary_new4(len - j, ptr);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-06-24 21:19:22 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
else {
|
|
|
|
/* normal: ary[num..-1], ary[num-2], ary[num-3], ..., ary[0] # top */
|
|
|
|
if (is_splat) {
|
|
|
|
if (num > len) {
|
2023-11-30 18:51:51 +03:00
|
|
|
*cfp->sp++ = rb_ary_new();
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
else {
|
2023-11-30 18:51:51 +03:00
|
|
|
*cfp->sp++ = rb_ary_new4(len - num, ptr + num);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num > len) {
|
|
|
|
rb_num_t i = 0;
|
|
|
|
for (; i < num - len; i++) {
|
|
|
|
*cfp->sp++ = Qnil;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (rb_num_t j = 0; i < num; i++, j++) {
|
|
|
|
*cfp->sp++ = ptr[len - j - 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (rb_num_t j = 0; j < num; j++) {
|
|
|
|
*cfp->sp++ = ptr[num - j - 1];
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2011-09-02 09:36:49 +04:00
|
|
|
}
|
2011-09-01 12:31:24 +04:00
|
|
|
}
|
2023-11-30 18:51:51 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
RB_GC_GUARD(ary);
|
2011-09-01 12:31:24 +04:00
|
|
|
}
|
2009-09-06 11:40:24 +04:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
static VALUE vm_call_general(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling);
|
2009-09-06 11:40:24 +04:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static VALUE vm_mtbl_dump(VALUE klass, ID target_mid);
|
|
|
|
|
|
|
|
static struct rb_class_cc_entries *
|
2023-01-12 21:30:06 +03:00
|
|
|
vm_ccs_create(VALUE klass, struct rb_id_table *cc_tbl, ID mid, const rb_callable_method_entry_t *cme)
|
2019-09-18 11:18:48 +03:00
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
struct rb_class_cc_entries *ccs = ALLOC(struct rb_class_cc_entries);
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
ccs->debug_sig = ~(VALUE)ccs;
|
|
|
|
#endif
|
2020-12-12 19:32:17 +03:00
|
|
|
ccs->capa = 0;
|
2020-01-08 10:14:01 +03:00
|
|
|
ccs->len = 0;
|
2023-01-12 21:30:06 +03:00
|
|
|
ccs->cme = cme;
|
2020-01-08 10:14:01 +03:00
|
|
|
METHOD_ENTRY_CACHED_SET((rb_callable_method_entry_t *)cme);
|
2020-12-12 19:32:17 +03:00
|
|
|
ccs->entries = NULL;
|
2023-01-12 21:30:06 +03:00
|
|
|
|
|
|
|
rb_id_table_insert(cc_tbl, mid, (VALUE)ccs);
|
|
|
|
RB_OBJ_WRITTEN(klass, Qundef, cme);
|
2020-01-08 10:14:01 +03:00
|
|
|
return ccs;
|
|
|
|
}
|
2019-10-24 12:08:52 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static void
|
|
|
|
vm_ccs_push(VALUE klass, struct rb_class_cc_entries *ccs, const struct rb_callinfo *ci, const struct rb_callcache *cc)
|
|
|
|
{
|
2020-06-05 09:55:05 +03:00
|
|
|
if (! vm_cc_markable(cc)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
if (UNLIKELY(ccs->len == ccs->capa)) {
|
2020-12-12 19:32:17 +03:00
|
|
|
if (ccs->capa == 0) {
|
|
|
|
ccs->capa = 1;
|
|
|
|
ccs->entries = ALLOC_N(struct rb_class_cc_entries_entry, ccs->capa);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ccs->capa *= 2;
|
|
|
|
REALLOC_N(ccs->entries, struct rb_class_cc_entries_entry, ccs->capa);
|
|
|
|
}
|
2019-09-19 04:37:30 +03:00
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
VM_ASSERT(ccs->len < ccs->capa);
|
|
|
|
|
|
|
|
const int pos = ccs->len++;
|
2024-04-18 02:30:41 +03:00
|
|
|
ccs->entries[pos].argc = vm_ci_argc(ci);
|
|
|
|
ccs->entries[pos].flag = vm_ci_flag(ci);
|
2020-01-08 10:14:01 +03:00
|
|
|
RB_OBJ_WRITE(klass, &ccs->entries[pos].cc, cc);
|
|
|
|
|
|
|
|
if (RB_DEBUG_COUNTER_SETMAX(ccs_maxlen, ccs->len)) {
|
|
|
|
// for tuning
|
|
|
|
// vm_mtbl_dump(klass, 0);
|
2019-09-18 11:18:48 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
void
|
|
|
|
rb_vm_ccs_dump(struct rb_class_cc_entries *ccs)
|
mjit_compile.c: merge initial JIT compiler
which has been developed by Takashi Kokubun <takashikkbn@gmail> as
YARV-MJIT. Many of its bugs are fixed by wanabe <s.wanabe@gmail.com>.
This JIT compiler is designed to be a safe migration path to introduce
JIT compiler to MRI. So this commit does not include any bytecode
changes or dynamic instruction modifications, which are done in original
MJIT.
This commit even strips off some aggressive optimizations from
YARV-MJIT, and thus it's slower than YARV-MJIT too. But it's still
fairly faster than Ruby 2.5 in some benchmarks (attached below).
Note that this JIT compiler passes `make test`, `make test-all`, `make
test-spec` without JIT, and even with JIT. Not only it's perfectly safe
with JIT disabled because it does not replace VM instructions unlike
MJIT, but also with JIT enabled it stably runs Ruby applications
including Rails applications.
I'm expecting this version as just "initial" JIT compiler. I have many
optimization ideas which are skipped for initial merging, and you may
easily replace this JIT compiler with a faster one by just replacing
mjit_compile.c. `mjit_compile` interface is designed for the purpose.
common.mk: update dependencies for mjit_compile.c.
internal.h: declare `rb_vm_insn_addr2insn` for MJIT.
vm.c: exclude some definitions if `-DMJIT_HEADER` is provided to
compiler. This avoids to include some functions which take a long time
to compile, e.g. vm_exec_core. Some of the purpose is achieved in
transform_mjit_header.rb (see `IGNORED_FUNCTIONS`) but others are
manually resolved for now. Load mjit_helper.h for MJIT header.
mjit_helper.h: New. This is a file used only by JIT-ed code. I'll
refactor `mjit_call_cfunc` later.
vm_eval.c: add some #ifdef switches to skip compiling some functions
like Init_vm_eval.
win32/mkexports.rb: export thread/ec functions, which are used by MJIT.
include/ruby/defines.h: add MJIT_FUNC_EXPORTED macro alis to clarify
that a function is exported only for MJIT.
array.c: export a function used by MJIT.
bignum.c: ditto.
class.c: ditto.
compile.c: ditto.
error.c: ditto.
gc.c: ditto.
hash.c: ditto.
iseq.c: ditto.
numeric.c: ditto.
object.c: ditto.
proc.c: ditto.
re.c: ditto.
st.c: ditto.
string.c: ditto.
thread.c: ditto.
variable.c: ditto.
vm_backtrace.c: ditto.
vm_insnhelper.c: ditto.
vm_method.c: ditto.
I would like to improve maintainability of function exports, but I
believe this way is acceptable as initial merging if we clarify the
new exports are for MJIT (so that we can use them as TODO list to fix)
and add unit tests to detect unresolved symbols.
I'll add unit tests of JIT compilations in succeeding commits.
Author: Takashi Kokubun <takashikkbn@gmail.com>
Contributor: wanabe <s.wanabe@gmail.com>
Part of [Feature #14235]
---
* Known issues
* Code generated by gcc is faster than clang. The benchmark may be worse
in macOS. Following benchmark result is provided by gcc w/ Linux.
* Performance is decreased when Google Chrome is running
* JIT can work on MinGW, but it doesn't improve performance at least
in short running benchmark.
* Currently it doesn't perform well with Rails. We'll try to fix this
before release.
---
* Benchmark reslts
Benchmarked with:
Intel 4.0GHz i7-4790K with 16GB memory under x86-64 Ubuntu 8 Cores
- 2.0.0-p0: Ruby 2.0.0-p0
- r62186: Ruby trunk (early 2.6.0), before MJIT changes
- JIT off: On this commit, but without `--jit` option
- JIT on: On this commit, and with `--jit` option
** Optcarrot fps
Benchmark: https://github.com/mame/optcarrot
| |2.0.0-p0 |r62186 |JIT off |JIT on |
|:--------|:--------|:--------|:--------|:--------|
|fps |37.32 |51.46 |51.31 |58.88 |
|vs 2.0.0 |1.00x |1.38x |1.37x |1.58x |
** MJIT benchmarks
Benchmark: https://github.com/benchmark-driver/mjit-benchmarks
(Original: https://github.com/vnmakarov/ruby/tree/rtl_mjit_branch/MJIT-benchmarks)
| |2.0.0-p0 |r62186 |JIT off |JIT on |
|:----------|:--------|:--------|:--------|:--------|
|aread |1.00 |1.09 |1.07 |2.19 |
|aref |1.00 |1.13 |1.11 |2.22 |
|aset |1.00 |1.50 |1.45 |2.64 |
|awrite |1.00 |1.17 |1.13 |2.20 |
|call |1.00 |1.29 |1.26 |2.02 |
|const2 |1.00 |1.10 |1.10 |2.19 |
|const |1.00 |1.11 |1.10 |2.19 |
|fannk |1.00 |1.04 |1.02 |1.00 |
|fib |1.00 |1.32 |1.31 |1.84 |
|ivread |1.00 |1.13 |1.12 |2.43 |
|ivwrite |1.00 |1.23 |1.21 |2.40 |
|mandelbrot |1.00 |1.13 |1.16 |1.28 |
|meteor |1.00 |2.97 |2.92 |3.17 |
|nbody |1.00 |1.17 |1.15 |1.49 |
|nest-ntimes|1.00 |1.22 |1.20 |1.39 |
|nest-while |1.00 |1.10 |1.10 |1.37 |
|norm |1.00 |1.18 |1.16 |1.24 |
|nsvb |1.00 |1.16 |1.16 |1.17 |
|red-black |1.00 |1.02 |0.99 |1.12 |
|sieve |1.00 |1.30 |1.28 |1.62 |
|trees |1.00 |1.14 |1.13 |1.19 |
|while |1.00 |1.12 |1.11 |2.41 |
** Discourse's script/bench.rb
Benchmark: https://github.com/discourse/discourse/blob/v1.8.7/script/bench.rb
NOTE: Rails performance was somehow a little degraded with JIT for now.
We should fix this.
(At least I know opt_aref is performing badly in JIT and I have an idea
to fix it. Please wait for the fix.)
*** JIT off
Your Results: (note for timings- percentile is first, duration is second in millisecs)
categories_admin:
50: 17
75: 18
90: 22
99: 29
home_admin:
50: 21
75: 21
90: 27
99: 40
topic_admin:
50: 17
75: 18
90: 22
99: 32
categories:
50: 35
75: 41
90: 43
99: 77
home:
50: 39
75: 46
90: 49
99: 95
topic:
50: 46
75: 52
90: 56
99: 101
*** JIT on
Your Results: (note for timings- percentile is first, duration is second in millisecs)
categories_admin:
50: 19
75: 21
90: 25
99: 33
home_admin:
50: 24
75: 26
90: 30
99: 35
topic_admin:
50: 19
75: 20
90: 25
99: 30
categories:
50: 40
75: 44
90: 48
99: 76
home:
50: 42
75: 48
90: 51
99: 89
topic:
50: 49
75: 55
90: 58
99: 99
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62197 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-02-04 14:22:28 +03:00
|
|
|
{
|
2021-09-09 17:21:06 +03:00
|
|
|
ruby_debug_printf("ccs:%p (%d,%d)\n", (void *)ccs, ccs->len, ccs->capa);
|
2020-01-08 10:14:01 +03:00
|
|
|
for (int i=0; i<ccs->len; i++) {
|
2024-04-18 02:30:41 +03:00
|
|
|
ruby_debug_printf("CCS CI ID:flag:%x argc:%u\n",
|
|
|
|
ccs->entries[i].flag,
|
|
|
|
ccs->entries[i].argc);
|
2020-01-08 10:14:01 +03:00
|
|
|
rp(ccs->entries[i].cc);
|
2019-10-07 06:59:57 +03:00
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vm_ccs_verify(struct rb_class_cc_entries *ccs, ID mid, VALUE klass)
|
2019-10-07 06:59:57 +03:00
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
VM_ASSERT(vm_ccs_p(ccs));
|
|
|
|
VM_ASSERT(ccs->len <= ccs->capa);
|
2019-10-07 06:59:57 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
for (int i=0; i<ccs->len; i++) {
|
|
|
|
const struct rb_callcache *cc = ccs->entries[i].cc;
|
2019-10-07 06:59:57 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache));
|
|
|
|
VM_ASSERT(vm_cc_class_check(cc, klass));
|
`Primitive.mandatory_only?` for fast path
Compare with the C methods, A built-in methods written in Ruby is
slower if only mandatory parameters are given because it needs to
check the argumens and fill default values for optional and keyword
parameters (C methods can check the number of parameters with `argc`,
so there are no overhead). Passing mandatory arguments are common
(optional arguments are exceptional, in many cases) so it is important
to provide the fast path for such common cases.
`Primitive.mandatory_only?` is a special builtin function used with
`if` expression like that:
```ruby
def self.at(time, subsec = false, unit = :microsecond, in: nil)
if Primitive.mandatory_only?
Primitive.time_s_at1(time)
else
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
end
```
and it makes two ISeq,
```
def self.at(time, subsec = false, unit = :microsecond, in: nil)
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
def self.at(time)
Primitive.time_s_at1(time)
end
```
and (2) is pointed by (1). Note that `Primitive.mandatory_only?`
should be used only in a condition of an `if` statement and the
`if` statement should be equal to the methdo body (you can not
put any expression before and after the `if` statement).
A method entry with `mandatory_only?` (`Time.at` on the above case)
is marked as `iseq_overload`. When the method will be dispatch only
with mandatory arguments (`Time.at(0)` for example), make another
method entry with ISeq (2) as mandatory only method entry and it
will be cached in an inline method cache.
The idea is similar discussed in https://bugs.ruby-lang.org/issues/16254
but it only checks mandatory parameters or more, because many cases
only mandatory parameters are given. If we find other cases (optional
or keyword parameters are used frequently and it hurts performance),
we can extend the feature.
2021-11-12 20:12:20 +03:00
|
|
|
VM_ASSERT(vm_cc_check_cme(cc, ccs->cme));
|
2023-07-31 10:17:55 +03:00
|
|
|
VM_ASSERT(!vm_cc_super_p(cc));
|
|
|
|
VM_ASSERT(!vm_cc_refinement_p(cc));
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2024-02-12 20:56:44 +03:00
|
|
|
const rb_callable_method_entry_t *rb_check_overloaded_cme(const rb_callable_method_entry_t *cme, const struct rb_callinfo * const ci);
|
`Primitive.mandatory_only?` for fast path
Compare with the C methods, A built-in methods written in Ruby is
slower if only mandatory parameters are given because it needs to
check the argumens and fill default values for optional and keyword
parameters (C methods can check the number of parameters with `argc`,
so there are no overhead). Passing mandatory arguments are common
(optional arguments are exceptional, in many cases) so it is important
to provide the fast path for such common cases.
`Primitive.mandatory_only?` is a special builtin function used with
`if` expression like that:
```ruby
def self.at(time, subsec = false, unit = :microsecond, in: nil)
if Primitive.mandatory_only?
Primitive.time_s_at1(time)
else
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
end
```
and it makes two ISeq,
```
def self.at(time, subsec = false, unit = :microsecond, in: nil)
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
def self.at(time)
Primitive.time_s_at1(time)
end
```
and (2) is pointed by (1). Note that `Primitive.mandatory_only?`
should be used only in a condition of an `if` statement and the
`if` statement should be equal to the methdo body (you can not
put any expression before and after the `if` statement).
A method entry with `mandatory_only?` (`Time.at` on the above case)
is marked as `iseq_overload`. When the method will be dispatch only
with mandatory arguments (`Time.at(0)` for example), make another
method entry with ISeq (2) as mandatory only method entry and it
will be cached in an inline method cache.
The idea is similar discussed in https://bugs.ruby-lang.org/issues/16254
but it only checks mandatory parameters or more, because many cases
only mandatory parameters are given. If we find other cases (optional
or keyword parameters are used frequently and it hurts performance),
we can extend the feature.
2021-11-12 20:12:20 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static const struct rb_callcache *
|
2020-12-15 06:10:35 +03:00
|
|
|
vm_search_cc(const VALUE klass, const struct rb_callinfo * const ci)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
2020-12-15 06:10:35 +03:00
|
|
|
const ID mid = vm_ci_mid(ci);
|
2020-01-08 10:14:01 +03:00
|
|
|
struct rb_id_table *cc_tbl = RCLASS_CC_TBL(klass);
|
|
|
|
struct rb_class_cc_entries *ccs = NULL;
|
2021-08-05 14:09:25 +03:00
|
|
|
VALUE ccs_data;
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
if (cc_tbl) {
|
2024-04-18 02:30:41 +03:00
|
|
|
// CCS data is keyed on method id, so we don't need the method id
|
|
|
|
// for doing comparisons in the `for` loop below.
|
2021-08-05 14:09:25 +03:00
|
|
|
if (rb_id_table_lookup(cc_tbl, mid, &ccs_data)) {
|
|
|
|
ccs = (struct rb_class_cc_entries *)ccs_data;
|
2020-01-08 10:14:01 +03:00
|
|
|
const int ccs_len = ccs->len;
|
|
|
|
|
|
|
|
if (UNLIKELY(METHOD_ENTRY_INVALIDATED(ccs->cme))) {
|
|
|
|
rb_vm_ccs_free(ccs);
|
|
|
|
rb_id_table_delete(cc_tbl, mid);
|
|
|
|
ccs = NULL;
|
|
|
|
}
|
|
|
|
else {
|
2021-12-21 00:03:51 +03:00
|
|
|
VM_ASSERT(vm_ccs_verify(ccs, mid, klass));
|
|
|
|
|
2024-04-18 02:30:41 +03:00
|
|
|
// We already know the method id is correct because we had
|
|
|
|
// to look up the ccs_data by method id. All we need to
|
|
|
|
// compare is argc and flag
|
|
|
|
unsigned int argc = vm_ci_argc(ci);
|
|
|
|
unsigned int flag = vm_ci_flag(ci);
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
for (int i=0; i<ccs_len; i++) {
|
2024-04-18 02:30:41 +03:00
|
|
|
unsigned int ccs_ci_argc = ccs->entries[i].argc;
|
|
|
|
unsigned int ccs_ci_flag = ccs->entries[i].flag;
|
2020-01-08 10:14:01 +03:00
|
|
|
const struct rb_callcache *ccs_cc = ccs->entries[i].cc;
|
|
|
|
|
|
|
|
VM_ASSERT(IMEMO_TYPE_P(ccs_cc, imemo_callcache));
|
|
|
|
|
2024-04-18 02:30:41 +03:00
|
|
|
if (ccs_ci_argc == argc && ccs_ci_flag == flag) {
|
2020-12-15 06:10:35 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(cc_found_in_ccs);
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
VM_ASSERT(vm_cc_cme(ccs_cc)->called_id == mid);
|
|
|
|
VM_ASSERT(ccs_cc->klass == klass);
|
|
|
|
VM_ASSERT(!METHOD_ENTRY_INVALIDATED(vm_cc_cme(ccs_cc)));
|
|
|
|
|
|
|
|
return ccs_cc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-10-07 06:59:57 +03:00
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
cc_tbl = RCLASS_CC_TBL(klass) = rb_id_table_create(2);
|
|
|
|
}
|
|
|
|
|
2020-12-15 06:10:35 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(cc_not_found_in_ccs);
|
|
|
|
|
|
|
|
const rb_callable_method_entry_t *cme;
|
|
|
|
|
|
|
|
if (ccs) {
|
|
|
|
cme = ccs->cme;
|
|
|
|
cme = UNDEFINED_METHOD_ENTRY_P(cme) ? NULL : cme;
|
|
|
|
|
|
|
|
VM_ASSERT(cme == rb_callable_method_entry(klass, mid));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cme = rb_callable_method_entry(klass, mid);
|
|
|
|
}
|
|
|
|
|
|
|
|
VM_ASSERT(cme == NULL || IMEMO_TYPE_P(cme, imemo_ment));
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
if (cme == NULL) {
|
|
|
|
// undef or not found: can't cache the information
|
2020-05-29 10:04:53 +03:00
|
|
|
VM_ASSERT(vm_cc_cme(&vm_empty_cc) == NULL);
|
|
|
|
return &vm_empty_cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
|
2021-01-07 12:06:24 +03:00
|
|
|
VM_ASSERT(cme == rb_callable_method_entry(klass, mid));
|
2020-01-08 10:14:01 +03:00
|
|
|
|
2020-12-15 06:10:35 +03:00
|
|
|
METHOD_ENTRY_CACHED_SET((struct rb_callable_method_entry_struct *)cme);
|
|
|
|
|
|
|
|
if (ccs == NULL) {
|
|
|
|
VM_ASSERT(cc_tbl != NULL);
|
|
|
|
|
2021-08-05 14:09:25 +03:00
|
|
|
if (LIKELY(rb_id_table_lookup(cc_tbl, mid, &ccs_data))) {
|
2020-12-15 06:10:35 +03:00
|
|
|
// rb_callable_method_entry() prepares ccs.
|
2021-08-05 14:09:25 +03:00
|
|
|
ccs = (struct rb_class_cc_entries *)ccs_data;
|
2020-12-15 06:10:35 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// TODO: required?
|
2023-01-12 21:30:06 +03:00
|
|
|
ccs = vm_ccs_create(klass, cc_tbl, mid, cme);
|
2020-12-15 06:10:35 +03:00
|
|
|
}
|
2019-10-07 06:59:57 +03:00
|
|
|
}
|
2020-12-15 06:10:35 +03:00
|
|
|
|
2024-02-12 20:56:44 +03:00
|
|
|
cme = rb_check_overloaded_cme(cme, ci);
|
2021-12-21 00:03:51 +03:00
|
|
|
|
2023-07-31 06:26:27 +03:00
|
|
|
const struct rb_callcache *cc = vm_cc_new(klass, cme, vm_call_general, cc_type_normal);
|
2020-12-15 06:10:35 +03:00
|
|
|
vm_ccs_push(klass, ccs, ci, cc);
|
|
|
|
|
|
|
|
VM_ASSERT(vm_cc_cme(cc) != NULL);
|
|
|
|
VM_ASSERT(cme->called_id == mid);
|
|
|
|
VM_ASSERT(vm_cc_cme(cc)->called_id == mid);
|
|
|
|
|
|
|
|
return cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
2019-10-07 06:59:57 +03:00
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
const struct rb_callcache *
|
2020-12-14 23:40:38 +03:00
|
|
|
rb_vm_search_method_slowpath(const struct rb_callinfo *ci, VALUE klass)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc;
|
|
|
|
|
2024-10-31 16:12:16 +03:00
|
|
|
VM_ASSERT_TYPE2(klass, T_CLASS, T_ICLASS);
|
2020-12-14 23:40:38 +03:00
|
|
|
|
2020-03-09 20:22:11 +03:00
|
|
|
RB_VM_LOCK_ENTER();
|
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
cc = vm_search_cc(klass, ci);
|
2020-12-14 11:56:34 +03:00
|
|
|
|
2020-03-09 20:22:11 +03:00
|
|
|
VM_ASSERT(cc);
|
|
|
|
VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache));
|
|
|
|
VM_ASSERT(cc == vm_cc_empty() || cc->klass == klass);
|
|
|
|
VM_ASSERT(cc == vm_cc_empty() || callable_method_entry_p(vm_cc_cme(cc)));
|
|
|
|
VM_ASSERT(cc == vm_cc_empty() || !METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc)));
|
2020-12-14 23:40:38 +03:00
|
|
|
VM_ASSERT(cc == vm_cc_empty() || vm_cc_cme(cc)->called_id == vm_ci_mid(ci));
|
2020-03-09 20:22:11 +03:00
|
|
|
}
|
|
|
|
RB_VM_LOCK_LEAVE();
|
2020-12-14 23:40:38 +03:00
|
|
|
|
|
|
|
return cc;
|
2019-10-07 06:59:57 +03:00
|
|
|
}
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
static const struct rb_callcache *
|
2020-12-16 04:36:23 +03:00
|
|
|
vm_search_method_slowpath0(VALUE cd_owner, struct rb_call_data *cd, VALUE klass)
|
2009-07-13 08:44:20 +04:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
#if USE_DEBUG_COUNTER
|
|
|
|
const struct rb_callcache *old_cc = cd->cc;
|
|
|
|
#endif
|
|
|
|
|
2020-12-15 06:10:35 +03:00
|
|
|
const struct rb_callcache *cc = rb_vm_search_method_slowpath(cd->ci, klass);
|
2020-12-14 23:40:38 +03:00
|
|
|
|
|
|
|
#if OPT_INLINE_METHOD_CACHE
|
|
|
|
cd->cc = cc;
|
|
|
|
|
2023-03-07 09:03:39 +03:00
|
|
|
const struct rb_callcache *empty_cc = &vm_empty_cc;
|
2023-07-31 10:04:16 +03:00
|
|
|
if (cd_owner && cc != empty_cc) {
|
|
|
|
RB_OBJ_WRITTEN(cd_owner, Qundef, cc);
|
|
|
|
}
|
2020-12-14 23:40:38 +03:00
|
|
|
|
|
|
|
#if USE_DEBUG_COUNTER
|
2024-06-03 10:54:05 +03:00
|
|
|
if (!old_cc || old_cc == empty_cc) {
|
2020-12-14 23:40:38 +03:00
|
|
|
// empty
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_empty);
|
|
|
|
}
|
|
|
|
else if (old_cc == cc) {
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_same_cc);
|
|
|
|
}
|
|
|
|
else if (vm_cc_cme(old_cc) == vm_cc_cme(cc)) {
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_same_cme);
|
|
|
|
}
|
|
|
|
else if (vm_cc_cme(old_cc) && vm_cc_cme(cc) &&
|
|
|
|
vm_cc_cme(old_cc)->def == vm_cc_cme(cc)->def) {
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_same_def);
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_diff);
|
2009-07-13 08:44:20 +04:00
|
|
|
}
|
2013-08-27 11:11:49 +04:00
|
|
|
#endif
|
2020-12-14 23:40:38 +03:00
|
|
|
#endif // OPT_INLINE_METHOD_CACHE
|
2020-01-08 10:14:01 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
VM_ASSERT(vm_cc_cme(cc) == NULL ||
|
|
|
|
vm_cc_cme(cc)->called_id == vm_ci_mid(cd->ci));
|
2020-12-15 06:10:35 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return cc;
|
2009-07-13 08:44:20 +04:00
|
|
|
}
|
|
|
|
|
2020-12-19 12:04:47 +03:00
|
|
|
ALWAYS_INLINE(static const struct rb_callcache *vm_search_method_fastpath(VALUE cd_owner, struct rb_call_data *cd, VALUE klass));
|
2020-12-19 10:30:09 +03:00
|
|
|
static const struct rb_callcache *
|
2020-12-15 06:10:35 +03:00
|
|
|
vm_search_method_fastpath(VALUE cd_owner, struct rb_call_data *cd, VALUE klass)
|
|
|
|
{
|
|
|
|
const struct rb_callcache *cc = cd->cc;
|
|
|
|
|
|
|
|
#if OPT_INLINE_METHOD_CACHE
|
|
|
|
if (LIKELY(vm_cc_class_check(cc, klass))) {
|
2021-11-16 12:18:45 +03:00
|
|
|
if (LIKELY(!METHOD_ENTRY_INVALIDATED(vm_cc_cme(cc)))) {
|
2020-12-15 06:10:35 +03:00
|
|
|
VM_ASSERT(callable_method_entry_p(vm_cc_cme(cc)));
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_hit);
|
|
|
|
VM_ASSERT(vm_cc_cme(cc) == NULL || // not found
|
|
|
|
(vm_ci_flag(cd->ci) & VM_CALL_SUPER) || // search_super w/ define_method
|
|
|
|
vm_cc_cme(cc)->called_id == vm_ci_mid(cd->ci)); // cme->called_id == ci->mid
|
|
|
|
|
|
|
|
return cc;
|
|
|
|
}
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_invalidated);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
RB_DEBUG_COUNTER_INC(mc_inline_miss_klass);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-12-16 04:36:23 +03:00
|
|
|
return vm_search_method_slowpath0(cd_owner, cd, klass);
|
2020-12-15 06:10:35 +03:00
|
|
|
}
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
static const struct rb_callcache *
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv)
|
2019-10-29 05:37:25 +03:00
|
|
|
{
|
|
|
|
VALUE klass = CLASS_OF(recv);
|
|
|
|
VM_ASSERT(klass != Qfalse);
|
|
|
|
VM_ASSERT(RBASIC_CLASS(klass) == 0 || rb_obj_is_kind_of(klass, rb_cClass));
|
2020-01-08 10:14:01 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_search_method_fastpath(cd_owner, cd, klass);
|
2019-10-29 05:37:25 +03:00
|
|
|
}
|
|
|
|
|
2022-09-14 08:35:47 +03:00
|
|
|
#if __has_attribute(transparent_union)
|
|
|
|
typedef union {
|
|
|
|
VALUE (*anyargs)(ANYARGS);
|
|
|
|
VALUE (*f00)(VALUE);
|
|
|
|
VALUE (*f01)(VALUE, VALUE);
|
|
|
|
VALUE (*f02)(VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f03)(VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f04)(VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f05)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f06)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f07)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f08)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f09)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f10)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f11)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f12)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f13)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f14)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*f15)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE);
|
|
|
|
VALUE (*fm1)(int, union { VALUE *x; const VALUE *y; } __attribute__((__transparent_union__)), VALUE);
|
|
|
|
} __attribute__((__transparent_union__)) cfunc_type;
|
2024-10-08 09:44:40 +03:00
|
|
|
# define make_cfunc_type(f) (cfunc_type){.anyargs = (VALUE (*)(ANYARGS))(f)}
|
2022-09-14 08:35:47 +03:00
|
|
|
#else
|
|
|
|
typedef VALUE (*cfunc_type)(ANYARGS);
|
2024-10-08 09:44:40 +03:00
|
|
|
# define make_cfunc_type(f) (cfunc_type)(f)
|
2022-09-14 08:35:47 +03:00
|
|
|
#endif
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline int
|
2022-09-14 08:35:47 +03:00
|
|
|
check_cfunc(const rb_callable_method_entry_t *me, cfunc_type func)
|
2009-09-06 11:40:24 +04:00
|
|
|
{
|
2020-06-03 08:21:54 +03:00
|
|
|
if (! me) {
|
|
|
|
return false;
|
2009-09-06 11:40:24 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
else {
|
2020-06-03 08:21:54 +03:00
|
|
|
VM_ASSERT(IMEMO_TYPE_P(me, imemo_ment));
|
|
|
|
VM_ASSERT(callable_method_entry_p(me));
|
|
|
|
VM_ASSERT(me->def);
|
|
|
|
if (me->def->type != VM_METHOD_TYPE_CFUNC) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else {
|
2022-09-14 08:35:47 +03:00
|
|
|
#if __has_attribute(transparent_union)
|
|
|
|
return me->def->body.cfunc.func == func.anyargs;
|
|
|
|
#else
|
2020-06-03 08:21:54 +03:00
|
|
|
return me->def->body.cfunc.func == func;
|
2022-09-14 08:35:47 +03:00
|
|
|
#endif
|
2020-06-03 08:21:54 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
}
|
* array.c, gc.c, hash.c, object.c, string.c, struct.c,
transcode.c, variable.c, vm.c, vm_insnhelper.c, vm_method.c:
replace calls to rb_error_frozen() with rb_check_frozen(). a
patch from Run Paint Run Run at [ruby-core:32014]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29583 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2010-10-24 12:14:05 +04:00
|
|
|
|
2017-05-26 09:28:38 +03:00
|
|
|
static inline int
|
2022-09-14 08:35:47 +03:00
|
|
|
vm_method_cfunc_is(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv, cfunc_type func)
|
2017-05-26 09:28:38 +03:00
|
|
|
{
|
2021-02-12 23:37:46 +03:00
|
|
|
VM_ASSERT(iseq != NULL);
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = vm_search_method((VALUE)iseq, cd, recv);
|
|
|
|
return check_cfunc(vm_cc_cme(cc), func);
|
2017-05-26 09:28:38 +03:00
|
|
|
}
|
|
|
|
|
2024-10-08 09:44:40 +03:00
|
|
|
#define check_cfunc(me, func) check_cfunc(me, make_cfunc_type(func))
|
|
|
|
#define vm_method_cfunc_is(iseq, cd, recv, func) vm_method_cfunc_is(iseq, cd, recv, make_cfunc_type(func))
|
|
|
|
|
2017-05-25 08:29:35 +03:00
|
|
|
#define EQ_UNREDEFINED_P(t) BASIC_OP_UNREDEFINED_P(BOP_EQ, t##_REDEFINED_OP_FLAG)
|
|
|
|
|
2019-01-14 07:49:28 +03:00
|
|
|
static inline bool
|
2018-12-28 04:06:04 +03:00
|
|
|
FIXNUM_2_P(VALUE a, VALUE b)
|
|
|
|
{
|
|
|
|
/* FIXNUM_P(a) && FIXNUM_P(b)
|
|
|
|
* == ((a & 1) && (b & 1))
|
|
|
|
* == a & b & 1 */
|
|
|
|
SIGNED_VALUE x = a;
|
|
|
|
SIGNED_VALUE y = b;
|
|
|
|
SIGNED_VALUE z = x & y & 1;
|
|
|
|
return z == 1;
|
|
|
|
}
|
|
|
|
|
2019-01-14 07:49:28 +03:00
|
|
|
static inline bool
|
2018-12-28 04:06:04 +03:00
|
|
|
FLONUM_2_P(VALUE a, VALUE b)
|
|
|
|
{
|
2018-12-28 06:42:11 +03:00
|
|
|
#if USE_FLONUM
|
2018-12-28 04:06:04 +03:00
|
|
|
/* FLONUM_P(a) && FLONUM_P(b)
|
|
|
|
* == ((a & 3) == 2) && ((b & 3) == 2)
|
|
|
|
* == ! ((a ^ 2) | (b ^ 2) & 3)
|
|
|
|
*/
|
|
|
|
SIGNED_VALUE x = a;
|
|
|
|
SIGNED_VALUE y = b;
|
|
|
|
SIGNED_VALUE z = ((x ^ 2) | (y ^ 2)) & 3;
|
|
|
|
return !z;
|
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-05-29 11:42:23 +03:00
|
|
|
static VALUE
|
2021-02-12 23:37:46 +03:00
|
|
|
opt_equality_specialized(VALUE recv, VALUE obj)
|
2017-05-26 09:28:38 +03:00
|
|
|
{
|
2020-05-29 11:42:23 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) && EQ_UNREDEFINED_P(INTEGER)) {
|
|
|
|
goto compare_by_identity;
|
2017-05-26 09:28:38 +03:00
|
|
|
}
|
2020-05-29 11:42:23 +03:00
|
|
|
else if (FLONUM_2_P(recv, obj) && EQ_UNREDEFINED_P(FLOAT)) {
|
|
|
|
goto compare_by_identity;
|
2017-05-26 09:28:38 +03:00
|
|
|
}
|
2020-05-29 11:42:23 +03:00
|
|
|
else if (STATIC_SYM_P(recv) && STATIC_SYM_P(obj) && EQ_UNREDEFINED_P(SYMBOL)) {
|
|
|
|
goto compare_by_identity;
|
2017-05-26 09:28:38 +03:00
|
|
|
}
|
2020-05-29 11:42:23 +03:00
|
|
|
else if (SPECIAL_CONST_P(recv)) {
|
2021-02-12 23:37:46 +03:00
|
|
|
//
|
2020-05-29 11:42:23 +03:00
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat && RB_FLOAT_TYPE_P(obj) && EQ_UNREDEFINED_P(FLOAT)) {
|
|
|
|
double a = RFLOAT_VALUE(recv);
|
|
|
|
double b = RFLOAT_VALUE(obj);
|
2017-05-26 09:28:38 +03:00
|
|
|
|
2020-05-29 11:42:23 +03:00
|
|
|
#if MSC_VERSION_BEFORE(1300)
|
|
|
|
if (isnan(a)) {
|
|
|
|
return Qfalse;
|
|
|
|
}
|
|
|
|
else if (isnan(b)) {
|
|
|
|
return Qfalse;
|
|
|
|
}
|
|
|
|
else
|
2012-10-14 23:58:59 +04:00
|
|
|
#endif
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(a == b);
|
2017-03-06 09:44:11 +03:00
|
|
|
}
|
2020-05-29 11:42:23 +03:00
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString && EQ_UNREDEFINED_P(STRING)) {
|
|
|
|
if (recv == obj) {
|
|
|
|
return Qtrue;
|
|
|
|
}
|
|
|
|
else if (RB_TYPE_P(obj, T_STRING)) {
|
|
|
|
return rb_str_eql_internal(obj, recv);
|
2019-08-04 14:11:00 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2021-02-12 23:37:46 +03:00
|
|
|
return Qundef;
|
2020-05-29 11:42:23 +03:00
|
|
|
|
|
|
|
compare_by_identity:
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(recv == obj);
|
2009-09-06 11:40:24 +04:00
|
|
|
}
|
2020-05-29 11:42:23 +03:00
|
|
|
|
2021-02-12 23:37:46 +03:00
|
|
|
static VALUE
|
|
|
|
opt_equality(const rb_iseq_t *cd_owner, VALUE recv, VALUE obj, CALL_DATA cd)
|
|
|
|
{
|
|
|
|
VM_ASSERT(cd_owner != NULL);
|
|
|
|
|
|
|
|
VALUE val = opt_equality_specialized(recv, obj);
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(val)) return val;
|
2021-02-12 23:37:46 +03:00
|
|
|
|
|
|
|
if (!vm_method_cfunc_is(cd_owner, cd, recv, rb_obj_equal)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(recv == obj);
|
2021-02-12 23:37:46 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-25 10:36:47 +03:00
|
|
|
#undef EQ_UNREDEFINED_P
|
2009-09-06 11:40:24 +04:00
|
|
|
|
2024-04-24 20:20:07 +03:00
|
|
|
static inline const struct rb_callcache *gccct_method_search(rb_execution_context_t *ec, VALUE recv, ID mid, const struct rb_callinfo *ci); // vm_eval.c
|
2021-02-12 23:37:46 +03:00
|
|
|
NOINLINE(static VALUE opt_equality_by_mid_slowpath(VALUE recv, VALUE obj, ID mid));
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
opt_equality_by_mid_slowpath(VALUE recv, VALUE obj, ID mid)
|
2013-08-27 11:46:08 +04:00
|
|
|
{
|
2024-04-24 20:20:07 +03:00
|
|
|
const struct rb_callcache *cc = gccct_method_search(GET_EC(), recv, mid, &VM_CI_ON_STACK(mid, 0, 1, NULL));
|
2020-05-29 10:04:53 +03:00
|
|
|
|
2021-02-12 23:37:46 +03:00
|
|
|
if (cc && check_cfunc(vm_cc_cme(cc), rb_obj_equal)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(recv == obj);
|
2021-02-12 23:37:46 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
2015-09-19 20:59:58 +03:00
|
|
|
|
2021-02-12 23:37:46 +03:00
|
|
|
static VALUE
|
|
|
|
opt_equality_by_mid(VALUE recv, VALUE obj, ID mid)
|
|
|
|
{
|
|
|
|
VALUE val = opt_equality_specialized(recv, obj);
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(val)) {
|
2021-02-12 23:37:46 +03:00
|
|
|
return val;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return opt_equality_by_mid_slowpath(recv, obj, mid);
|
|
|
|
}
|
2013-08-27 11:46:08 +04:00
|
|
|
}
|
|
|
|
|
2017-05-25 07:25:39 +03:00
|
|
|
VALUE
|
2021-02-12 23:37:46 +03:00
|
|
|
rb_equal_opt(VALUE obj1, VALUE obj2)
|
2017-05-25 07:25:39 +03:00
|
|
|
{
|
2021-02-12 23:37:46 +03:00
|
|
|
return opt_equality_by_mid(obj1, obj2, idEq);
|
|
|
|
}
|
2020-06-01 07:07:49 +03:00
|
|
|
|
2021-02-12 23:37:46 +03:00
|
|
|
VALUE
|
|
|
|
rb_eql_opt(VALUE obj1, VALUE obj2)
|
|
|
|
{
|
|
|
|
return opt_equality_by_mid(obj1, obj2, idEqlP);
|
2017-05-25 07:25:39 +03:00
|
|
|
}
|
2021-02-12 23:37:46 +03:00
|
|
|
|
2019-09-04 00:49:03 +03:00
|
|
|
extern VALUE rb_vm_call0(rb_execution_context_t *ec, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *, int kw_splat);
|
2021-09-30 22:35:27 +03:00
|
|
|
extern VALUE rb_vm_call_with_refinements(rb_execution_context_t *, VALUE, ID, int, const VALUE *, int);
|
2013-09-07 10:44:31 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2017-11-16 09:10:31 +03:00
|
|
|
check_match(rb_execution_context_t *ec, VALUE pattern, VALUE target, enum vm_check_match_type type)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case VM_CHECKMATCH_TYPE_WHEN:
|
|
|
|
return pattern;
|
2013-09-07 10:44:31 +04:00
|
|
|
case VM_CHECKMATCH_TYPE_RESCUE:
|
2012-10-14 23:58:59 +04:00
|
|
|
if (!rb_obj_is_kind_of(pattern, rb_cModule)) {
|
|
|
|
rb_raise(rb_eTypeError, "class or module required for rescue clause");
|
|
|
|
}
|
2013-09-07 10:44:31 +04:00
|
|
|
/* fall through */
|
|
|
|
case VM_CHECKMATCH_TYPE_CASE: {
|
2021-09-30 22:35:27 +03:00
|
|
|
return rb_vm_call_with_refinements(ec, pattern, idEqq, 1, &target, RB_NO_KEYWORDS);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
rb_bug("check_match: unreachable");
|
|
|
|
}
|
|
|
|
}
|
2007-08-06 15:36:30 +04:00
|
|
|
|
|
|
|
|
2019-12-02 08:58:43 +03:00
|
|
|
#if MSC_VERSION_BEFORE(1300)
|
2012-10-14 23:58:59 +04:00
|
|
|
#define CHECK_CMP_NAN(a, b) if (isnan(a) || isnan(b)) return Qfalse;
|
|
|
|
#else
|
|
|
|
#define CHECK_CMP_NAN(a, b) /* do nothing */
|
|
|
|
#endif
|
2007-08-06 15:36:30 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
|
|
|
double_cmp_lt(double a, double b)
|
|
|
|
{
|
|
|
|
CHECK_CMP_NAN(a, b);
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(a < b);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-08-06 15:36:30 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
|
|
|
double_cmp_le(double a, double b)
|
|
|
|
{
|
|
|
|
CHECK_CMP_NAN(a, b);
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(a <= b);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2007-08-06 15:36:30 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
|
|
|
double_cmp_gt(double a, double b)
|
|
|
|
{
|
|
|
|
CHECK_CMP_NAN(a, b);
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(a > b);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2012-06-11 07:14:59 +04:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static inline VALUE
|
|
|
|
double_cmp_ge(double a, double b)
|
|
|
|
{
|
|
|
|
CHECK_CMP_NAN(a, b);
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(a >= b);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2012-06-11 07:14:59 +04:00
|
|
|
|
2023-07-17 20:57:58 +03:00
|
|
|
// Copied by vm_dump.c
|
2019-03-25 17:26:11 +03:00
|
|
|
static inline VALUE *
|
2016-07-28 14:02:30 +03:00
|
|
|
vm_base_ptr(const rb_control_frame_t *cfp)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
2008-06-17 23:27:24 +04:00
|
|
|
|
2016-08-03 04:50:50 +03:00
|
|
|
if (cfp->iseq && VM_FRAME_RUBYFRAME_P(cfp)) {
|
2022-03-23 22:19:48 +03:00
|
|
|
VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE;
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
|
|
|
if (ISEQ_BODY(cfp->iseq)->param.flags.forwardable && VM_ENV_LOCAL_P(cfp->ep)) {
|
|
|
|
int lts = ISEQ_BODY(cfp->iseq)->local_table_size;
|
|
|
|
int params = ISEQ_BODY(cfp->iseq)->param.size;
|
|
|
|
|
|
|
|
CALL_INFO ci = (CALL_INFO)cfp->ep[-(VM_ENV_DATA_SIZE + (lts - params))]; // skip EP stuff, CI should be last local
|
|
|
|
bp += vm_ci_argc(ci);
|
|
|
|
}
|
|
|
|
|
2023-07-17 20:57:58 +03:00
|
|
|
if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) {
|
2016-07-28 14:02:30 +03:00
|
|
|
/* adjust `self' */
|
|
|
|
bp += 1;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
#if VM_DEBUG_BP_CHECK
|
2016-07-28 14:02:30 +03:00
|
|
|
if (bp != cfp->bp_check) {
|
2021-09-09 17:21:06 +03:00
|
|
|
ruby_debug_printf("bp_check: %ld, bp: %ld\n",
|
2017-10-26 17:44:09 +03:00
|
|
|
(long)(cfp->bp_check - GET_EC()->vm_stack),
|
|
|
|
(long)(bp - GET_EC()->vm_stack));
|
2016-07-28 14:02:30 +03:00
|
|
|
rb_bug("vm_base_ptr: unreachable");
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
#endif
|
2016-07-28 14:02:30 +03:00
|
|
|
return bp;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return NULL;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2008-06-17 23:27:24 +04:00
|
|
|
|
2023-08-09 02:06:22 +03:00
|
|
|
VALUE *
|
|
|
|
rb_vm_base_ptr(const rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
return vm_base_ptr(cfp);
|
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
/* method call processes with call_info */
|
2008-06-17 23:27:24 +04:00
|
|
|
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
#include "vm_args.c"
|
2012-11-13 12:34:43 +04:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
static inline VALUE vm_call_iseq_setup_2(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, int opt_pc, int param_size, int local_size);
|
2020-11-27 09:41:15 +03:00
|
|
|
ALWAYS_INLINE(static VALUE vm_call_iseq_setup_normal(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, const rb_callable_method_entry_t *me, int opt_pc, int param_size, int local_size));
|
2020-12-14 23:40:38 +03:00
|
|
|
static inline VALUE vm_call_iseq_setup_tailcall(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, int opt_pc);
|
|
|
|
static VALUE vm_call_super_method(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling);
|
|
|
|
static VALUE vm_call_method_nome(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling);
|
|
|
|
static VALUE vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling);
|
|
|
|
static inline VALUE vm_call_method(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
static vm_call_handler vm_call_iseq_setup_func(const struct rb_callinfo *ci, const int param_size, const int local_size);
|
2015-10-23 20:53:35 +03:00
|
|
|
|
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_iseq_setup_tailcall_0start(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2015-10-23 20:53:35 +03:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_setup_tailcall_0start);
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_iseq_setup_tailcall(ec, cfp, calling, 0);
|
2015-10-23 20:53:35 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_iseq_setup_normal_0start(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2015-10-23 20:53:35 +03:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_setup_0start);
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
2022-03-23 22:19:48 +03:00
|
|
|
int param = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local = ISEQ_BODY(iseq)->local_table_size;
|
2020-11-27 09:41:15 +03:00
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), 0, param, local);
|
2015-10-23 20:53:35 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 09:02:03 +03:00
|
|
|
bool
|
2018-02-05 18:49:32 +03:00
|
|
|
rb_simple_iseq_p(const rb_iseq_t *iseq)
|
2015-12-22 14:20:12 +03:00
|
|
|
{
|
2022-03-23 22:19:48 +03:00
|
|
|
return ISEQ_BODY(iseq)->param.flags.has_opt == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_rest == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_post == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kw == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kwrest == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.accepts_no_kwarg == FALSE &&
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.forwardable == FALSE &&
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.has_block == FALSE;
|
2015-12-22 14:20:12 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
bool
|
2019-03-20 22:57:39 +03:00
|
|
|
rb_iseq_only_optparam_p(const rb_iseq_t *iseq)
|
|
|
|
{
|
2022-03-23 22:19:48 +03:00
|
|
|
return ISEQ_BODY(iseq)->param.flags.has_opt == TRUE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_rest == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_post == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kw == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kwrest == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.accepts_no_kwarg == FALSE &&
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.forwardable == FALSE &&
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.has_block == FALSE;
|
2019-03-20 22:57:39 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
bool
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
rb_iseq_only_kwparam_p(const rb_iseq_t *iseq)
|
|
|
|
{
|
2022-03-23 22:19:48 +03:00
|
|
|
return ISEQ_BODY(iseq)->param.flags.has_opt == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_rest == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_post == FALSE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kw == TRUE &&
|
|
|
|
ISEQ_BODY(iseq)->param.flags.has_kwrest == FALSE &&
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.forwardable == FALSE &&
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.has_block == FALSE;
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
}
|
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
#define ALLOW_HEAP_ARGV (-2)
|
|
|
|
#define ALLOW_HEAP_ARGV_KEEP_KWSPLAT (-3)
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
vm_caller_setup_arg_splat(rb_control_frame_t *cfp, struct rb_calling_info *calling, VALUE ary, int max_args)
|
2020-04-14 06:32:59 +03:00
|
|
|
{
|
2023-01-13 11:52:59 +03:00
|
|
|
vm_check_canary(GET_EC(), cfp->sp);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
bool ret = false;
|
2023-01-13 11:52:59 +03:00
|
|
|
|
|
|
|
if (!NIL_P(ary)) {
|
2023-07-13 16:45:35 +03:00
|
|
|
const VALUE *ptr = RARRAY_CONST_PTR(ary);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
long len = RARRAY_LEN(ary);
|
|
|
|
int argc = calling->argc;
|
|
|
|
|
|
|
|
if (UNLIKELY(max_args <= ALLOW_HEAP_ARGV && len + argc > VM_ARGC_STACK_MAX)) {
|
|
|
|
/* Avoid SystemStackError when splatting large arrays by storing arguments in
|
|
|
|
* a temporary array, instead of trying to keeping arguments on the VM stack.
|
|
|
|
*/
|
|
|
|
VALUE *argv = cfp->sp - argc;
|
|
|
|
VALUE argv_ary = rb_ary_hidden_new(len + argc + 1);
|
|
|
|
rb_ary_cat(argv_ary, argv, argc);
|
|
|
|
rb_ary_cat(argv_ary, ptr, len);
|
|
|
|
cfp->sp -= argc - 1;
|
|
|
|
cfp->sp[-1] = argv_ary;
|
|
|
|
calling->argc = 1;
|
|
|
|
calling->heap_argv = argv_ary;
|
|
|
|
RB_GC_GUARD(ary);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
long i;
|
|
|
|
|
|
|
|
if (max_args >= 0 && len + argc > max_args) {
|
|
|
|
/* If only a given max_args is allowed, copy up to max args.
|
|
|
|
* Used by vm_callee_setup_block_arg for non-lambda blocks,
|
|
|
|
* where additional arguments are ignored.
|
|
|
|
*
|
|
|
|
* Also, copy up to one more argument than the maximum,
|
|
|
|
* in case it is an empty keyword hash that will be removed.
|
|
|
|
*/
|
|
|
|
calling->argc += len - (max_args - argc + 1);
|
|
|
|
len = max_args - argc + 1;
|
|
|
|
ret = true;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* Unset heap_argv if set originally. Can happen when
|
|
|
|
* forwarding modified arguments, where heap_argv was used
|
|
|
|
* originally, but heap_argv not supported by the forwarded
|
|
|
|
* method in all cases.
|
|
|
|
*/
|
|
|
|
calling->heap_argv = 0;
|
|
|
|
}
|
|
|
|
CHECK_VM_STACK_OVERFLOW(cfp, len);
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
*cfp->sp++ = ptr[i];
|
|
|
|
}
|
|
|
|
calling->argc += i;
|
2023-01-13 11:52:59 +03:00
|
|
|
}
|
|
|
|
}
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
|
|
|
|
return ret;
|
2020-04-14 06:32:59 +03:00
|
|
|
}
|
|
|
|
|
2023-01-13 11:52:59 +03:00
|
|
|
static inline void
|
|
|
|
vm_caller_setup_arg_kw(rb_control_frame_t *cfp, struct rb_calling_info *calling, const struct rb_callinfo *ci)
|
|
|
|
{
|
|
|
|
const VALUE *const passed_keywords = vm_ci_kwarg(ci)->keywords;
|
|
|
|
const int kw_len = vm_ci_kwarg(ci)->keyword_len;
|
|
|
|
const VALUE h = rb_hash_new_with_size(kw_len);
|
|
|
|
VALUE *sp = cfp->sp;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i=0; i<kw_len; i++) {
|
|
|
|
rb_hash_aset(h, passed_keywords[i], (sp - kw_len)[i]);
|
|
|
|
}
|
|
|
|
(sp-kw_len)[0] = h;
|
|
|
|
|
|
|
|
cfp->sp -= kw_len - 1;
|
|
|
|
calling->argc -= kw_len - 1;
|
|
|
|
calling->kw_splat = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
|
|
|
vm_caller_setup_keyword_hash(const struct rb_callinfo *ci, VALUE keyword_hash)
|
|
|
|
{
|
|
|
|
if (UNLIKELY(!RB_TYPE_P(keyword_hash, T_HASH))) {
|
2024-01-14 22:41:02 +03:00
|
|
|
if (keyword_hash != Qnil) {
|
|
|
|
/* Convert a non-hash keyword splat to a new hash */
|
|
|
|
keyword_hash = rb_hash_dup(rb_to_hash_type(keyword_hash));
|
|
|
|
}
|
2023-01-13 11:52:59 +03:00
|
|
|
}
|
Avoid hash allocation for certain proc calls
Previously, proc calls such as:
```ruby
proc{|| }.(**empty_hash)
proc{|b: 1| }.(**r2k_array_with_empty_hash)
```
both allocated hashes unnecessarily, due to two separate code paths.
The first call goes through CALLER_SETUP_ARG/vm_caller_setup_keyword_hash,
and is simple to fix by not duping an empty keyword hash that will be
dropped.
The second case is more involved, in setup_parameters_complex, but is
fixed the exact same way as when the ruby2_keywords hash is not empty,
by flattening the rest array to the VM stack, ignoring the last
element (the empty keyword splat). Add a flatten_rest_array static
function to handle this case.
Update test_allocation.rb to automatically convert the method call
allocation tests to proc allocation tests, at least for the calls
that can be converted. With the code changes, all proc call
allocation tests pass, showing that proc calls and method calls
now allocate the same number of objects.
I've audited the allocation tests, and I believe that all of the low
hanging fruit has been collected. All remaining allocations are
either caller side:
* Positional splat + post argument
* Multiple positional splats
* Literal keywords + keyword splat
* Multiple keyword splats
Or callee side:
* Positional splat parameter
* Keyword splat parameter
* Keyword to positional argument conversion for methods that don't accept keywords
* ruby2_keywords method called with keywords
Reapplies abc04e898b627ab37fa9dd5e330f239768778d8b, which was reverted at
d56470a27c5a8a2e7aee7a76cea445c2d29c0c59, with the addition of a bug fix and
test.
Fixes [Bug #20679]
2024-08-20 05:00:37 +03:00
|
|
|
else if (!IS_ARGS_KW_SPLAT_MUT(ci) && !RHASH_EMPTY_P(keyword_hash)) {
|
2023-01-13 11:52:59 +03:00
|
|
|
/* Convert a hash keyword splat to a new hash unless
|
|
|
|
* a mutable keyword splat was passed.
|
Avoid hash allocation for certain proc calls
Previously, proc calls such as:
```ruby
proc{|| }.(**empty_hash)
proc{|b: 1| }.(**r2k_array_with_empty_hash)
```
both allocated hashes unnecessarily, due to two separate code paths.
The first call goes through CALLER_SETUP_ARG/vm_caller_setup_keyword_hash,
and is simple to fix by not duping an empty keyword hash that will be
dropped.
The second case is more involved, in setup_parameters_complex, but is
fixed the exact same way as when the ruby2_keywords hash is not empty,
by flattening the rest array to the VM stack, ignoring the last
element (the empty keyword splat). Add a flatten_rest_array static
function to handle this case.
Update test_allocation.rb to automatically convert the method call
allocation tests to proc allocation tests, at least for the calls
that can be converted. With the code changes, all proc call
allocation tests pass, showing that proc calls and method calls
now allocate the same number of objects.
I've audited the allocation tests, and I believe that all of the low
hanging fruit has been collected. All remaining allocations are
either caller side:
* Positional splat + post argument
* Multiple positional splats
* Literal keywords + keyword splat
* Multiple keyword splats
Or callee side:
* Positional splat parameter
* Keyword splat parameter
* Keyword to positional argument conversion for methods that don't accept keywords
* ruby2_keywords method called with keywords
Reapplies abc04e898b627ab37fa9dd5e330f239768778d8b, which was reverted at
d56470a27c5a8a2e7aee7a76cea445c2d29c0c59, with the addition of a bug fix and
test.
Fixes [Bug #20679]
2024-08-20 05:00:37 +03:00
|
|
|
* Skip allocating new hash for empty keyword splat, as empty
|
|
|
|
* keyword splat will be ignored by both callers.
|
2023-01-13 11:52:59 +03:00
|
|
|
*/
|
|
|
|
keyword_hash = rb_hash_dup(keyword_hash);
|
|
|
|
}
|
|
|
|
return keyword_hash;
|
|
|
|
}
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
2019-01-14 07:49:28 +03:00
|
|
|
static inline void
|
Add Module#ruby2_keywords for passing keywords through regular argument splats
This approach uses a flag bit on the final hash object in the regular splat,
as opposed to a previous approach that used a VM frame flag. The hash flag
approach is less invasive, and handles some cases that the VM frame flag
approach does not, such as saving the argument splat array and splatting it
later:
ruby2_keywords def foo(*args)
@args = args
bar
end
def bar
baz(*@args)
end
def baz(*args, **kw)
[args, kw]
end
foo(a:1) #=> [[], {a: 1}]
foo({a: 1}, **{}) #=> [[{a: 1}], {}]
foo({a: 1}) #=> 2.7: [[], {a: 1}] # and warning
foo({a: 1}) #=> 3.0: [[{a: 1}], {}]
It doesn't handle some cases that the VM frame flag handles, such as when
the final hash object is replaced using Hash#merge, but those cases are
probably less common and are unlikely to properly support keyword
argument separation.
Use ruby2_keywords to handle argument delegation in the delegate library.
2019-09-21 19:03:36 +03:00
|
|
|
CALLER_SETUP_ARG(struct rb_control_frame_struct *restrict cfp,
|
|
|
|
struct rb_calling_info *restrict calling,
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
const struct rb_callinfo *restrict ci, int max_args)
|
2018-12-28 04:06:04 +03:00
|
|
|
{
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(IS_ARGS_SPLAT(ci))) {
|
|
|
|
if (IS_ARGS_KW_SPLAT(ci)) {
|
|
|
|
// f(*a, **kw)
|
|
|
|
VM_ASSERT(calling->kw_splat == 1);
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
cfp->sp -= 2;
|
|
|
|
calling->argc -= 2;
|
|
|
|
VALUE ary = cfp->sp[0];
|
|
|
|
VALUE kwh = vm_caller_setup_keyword_hash(ci, cfp->sp[1]);
|
|
|
|
|
|
|
|
// splat a
|
|
|
|
if (vm_caller_setup_arg_splat(cfp, calling, ary, max_args)) return;
|
|
|
|
|
|
|
|
// put kw
|
2024-01-14 22:41:02 +03:00
|
|
|
if (kwh != Qnil && !RHASH_EMPTY_P(kwh)) {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(calling->heap_argv)) {
|
|
|
|
rb_ary_push(calling->heap_argv, kwh);
|
|
|
|
((struct RHash *)kwh)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
|
|
|
|
if (max_args != ALLOW_HEAP_ARGV_KEEP_KWSPLAT) {
|
|
|
|
calling->kw_splat = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cfp->sp[0] = kwh;
|
|
|
|
cfp->sp++;
|
|
|
|
calling->argc++;
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
VM_ASSERT(calling->kw_splat == 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
calling->kw_splat = 0;
|
|
|
|
}
|
Reduce allocations for keyword argument hashes
Previously, passing a keyword splat to a method always allocated
a hash on the caller side, and accepting arbitrary keywords in
a method allocated a separate hash on the callee side. Passing
explicit keywords to a method that accepted a keyword splat
did not allocate a hash on the caller side, but resulted in two
hashes allocated on the callee side.
This commit makes passing a single keyword splat to a method not
allocate a hash on the caller side. Passing multiple keyword
splats or a mix of explicit keywords and a keyword splat still
generates a hash on the caller side. On the callee side,
if arbitrary keywords are not accepted, it does not allocate a
hash. If arbitrary keywords are accepted, it will allocate a
hash, but this commit uses a callinfo flag to indicate whether
the caller already allocated a hash, and if so, the callee can
use the passed hash without duplicating it. So this commit
should make it so that a maximum of a single hash is allocated
during method calls.
To set the callinfo flag appropriately, method call argument
compilation checks if only a single keyword splat is given.
If only one keyword splat is given, the VM_CALL_KW_SPLAT_MUT
callinfo flag is not set, since in that case the keyword
splat is passed directly and not mutable. If more than one
splat is used, a new hash needs to be generated on the caller
side, and in that case the callinfo flag is set, indicating
the keyword splat is mutable by the callee.
In compile_hash, used for both hash and keyword argument
compilation, if compiling keyword arguments and only a
single keyword splat is used, pass the argument directly.
On the caller side, in vm_args.c, the callinfo flag needs to
be recognized and handled. Because the keyword splat
argument may not be a hash, it needs to be converted to a
hash first if not. Then, unless the callinfo flag is set,
the hash needs to be duplicated. The temporary copy of the
callinfo flag, kw_flag, is updated if a hash was duplicated,
to prevent the need to duplicate it again. If we are
converting to a hash or duplicating a hash, we need to update
the argument array, which can including duplicating the
positional splat array if one was passed. CALLER_SETUP_ARG
and a couple other places needs to be modified to handle
similar issues for other types of calls.
This includes fairly comprehensive tests for different ways
keywords are handled internally, checking that you get equal
results but that keyword splats on the caller side result in
distinct objects for keyword rest parameters.
Included are benchmarks for keyword argument calls.
Brief results when compiled without optimization:
def kw(a: 1) a end
def kws(**kw) kw end
h = {a: 1}
kw(a: 1) # about same
kw(**h) # 2.37x faster
kws(a: 1) # 1.30x faster
kws(**h) # 2.19x faster
kw(a: 1, **h) # 1.03x slower
kw(**h, **h) # about same
kws(a: 1, **h) # 1.16x faster
kws(**h, **h) # 1.14x faster
2020-02-24 23:05:07 +03:00
|
|
|
}
|
|
|
|
else {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
// f(*a)
|
|
|
|
VM_ASSERT(calling->kw_splat == 0);
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
cfp->sp -= 1;
|
|
|
|
calling->argc -= 1;
|
|
|
|
VALUE ary = cfp->sp[0];
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (vm_caller_setup_arg_splat(cfp, calling, ary, max_args)) {
|
|
|
|
goto check_keyword;
|
|
|
|
}
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
// check the last argument
|
|
|
|
VALUE last_hash, argv_ary;
|
|
|
|
if (UNLIKELY(argv_ary = calling->heap_argv)) {
|
|
|
|
if (!IS_ARGS_KEYWORD(ci) &&
|
|
|
|
RARRAY_LEN(argv_ary) > 0 &&
|
|
|
|
RB_TYPE_P((last_hash = rb_ary_last(0, NULL, argv_ary)), T_HASH) &&
|
|
|
|
(((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS)) {
|
2023-01-13 11:52:59 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
rb_ary_pop(argv_ary);
|
|
|
|
if (!RHASH_EMPTY_P(last_hash)) {
|
|
|
|
rb_ary_push(argv_ary, rb_hash_dup(last_hash));
|
|
|
|
calling->kw_splat = 1;
|
|
|
|
}
|
|
|
|
}
|
Reduce allocations for keyword argument hashes
Previously, passing a keyword splat to a method always allocated
a hash on the caller side, and accepting arbitrary keywords in
a method allocated a separate hash on the callee side. Passing
explicit keywords to a method that accepted a keyword splat
did not allocate a hash on the caller side, but resulted in two
hashes allocated on the callee side.
This commit makes passing a single keyword splat to a method not
allocate a hash on the caller side. Passing multiple keyword
splats or a mix of explicit keywords and a keyword splat still
generates a hash on the caller side. On the callee side,
if arbitrary keywords are not accepted, it does not allocate a
hash. If arbitrary keywords are accepted, it will allocate a
hash, but this commit uses a callinfo flag to indicate whether
the caller already allocated a hash, and if so, the callee can
use the passed hash without duplicating it. So this commit
should make it so that a maximum of a single hash is allocated
during method calls.
To set the callinfo flag appropriately, method call argument
compilation checks if only a single keyword splat is given.
If only one keyword splat is given, the VM_CALL_KW_SPLAT_MUT
callinfo flag is not set, since in that case the keyword
splat is passed directly and not mutable. If more than one
splat is used, a new hash needs to be generated on the caller
side, and in that case the callinfo flag is set, indicating
the keyword splat is mutable by the callee.
In compile_hash, used for both hash and keyword argument
compilation, if compiling keyword arguments and only a
single keyword splat is used, pass the argument directly.
On the caller side, in vm_args.c, the callinfo flag needs to
be recognized and handled. Because the keyword splat
argument may not be a hash, it needs to be converted to a
hash first if not. Then, unless the callinfo flag is set,
the hash needs to be duplicated. The temporary copy of the
callinfo flag, kw_flag, is updated if a hash was duplicated,
to prevent the need to duplicate it again. If we are
converting to a hash or duplicating a hash, we need to update
the argument array, which can including duplicating the
positional splat array if one was passed. CALLER_SETUP_ARG
and a couple other places needs to be modified to handle
similar issues for other types of calls.
This includes fairly comprehensive tests for different ways
keywords are handled internally, checking that you get equal
results but that keyword splats on the caller side result in
distinct objects for keyword rest parameters.
Included are benchmarks for keyword argument calls.
Brief results when compiled without optimization:
def kw(a: 1) a end
def kws(**kw) kw end
h = {a: 1}
kw(a: 1) # about same
kw(**h) # 2.37x faster
kws(a: 1) # 1.30x faster
kws(**h) # 2.19x faster
kw(a: 1, **h) # 1.03x slower
kw(**h, **h) # about same
kws(a: 1, **h) # 1.16x faster
kws(**h, **h) # 1.14x faster
2020-02-24 23:05:07 +03:00
|
|
|
}
|
2023-01-13 11:52:59 +03:00
|
|
|
else {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
check_keyword:
|
|
|
|
if (!IS_ARGS_KEYWORD(ci) &&
|
|
|
|
calling->argc > 0 &&
|
|
|
|
RB_TYPE_P((last_hash = cfp->sp[-1]), T_HASH) &&
|
|
|
|
(((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS)) {
|
|
|
|
|
|
|
|
if (RHASH_EMPTY_P(last_hash)) {
|
|
|
|
calling->argc--;
|
|
|
|
cfp->sp -= 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cfp->sp[-1] = rb_hash_dup(last_hash);
|
|
|
|
calling->kw_splat = 1;
|
|
|
|
}
|
|
|
|
}
|
Reduce allocations for keyword argument hashes
Previously, passing a keyword splat to a method always allocated
a hash on the caller side, and accepting arbitrary keywords in
a method allocated a separate hash on the callee side. Passing
explicit keywords to a method that accepted a keyword splat
did not allocate a hash on the caller side, but resulted in two
hashes allocated on the callee side.
This commit makes passing a single keyword splat to a method not
allocate a hash on the caller side. Passing multiple keyword
splats or a mix of explicit keywords and a keyword splat still
generates a hash on the caller side. On the callee side,
if arbitrary keywords are not accepted, it does not allocate a
hash. If arbitrary keywords are accepted, it will allocate a
hash, but this commit uses a callinfo flag to indicate whether
the caller already allocated a hash, and if so, the callee can
use the passed hash without duplicating it. So this commit
should make it so that a maximum of a single hash is allocated
during method calls.
To set the callinfo flag appropriately, method call argument
compilation checks if only a single keyword splat is given.
If only one keyword splat is given, the VM_CALL_KW_SPLAT_MUT
callinfo flag is not set, since in that case the keyword
splat is passed directly and not mutable. If more than one
splat is used, a new hash needs to be generated on the caller
side, and in that case the callinfo flag is set, indicating
the keyword splat is mutable by the callee.
In compile_hash, used for both hash and keyword argument
compilation, if compiling keyword arguments and only a
single keyword splat is used, pass the argument directly.
On the caller side, in vm_args.c, the callinfo flag needs to
be recognized and handled. Because the keyword splat
argument may not be a hash, it needs to be converted to a
hash first if not. Then, unless the callinfo flag is set,
the hash needs to be duplicated. The temporary copy of the
callinfo flag, kw_flag, is updated if a hash was duplicated,
to prevent the need to duplicate it again. If we are
converting to a hash or duplicating a hash, we need to update
the argument array, which can including duplicating the
positional splat array if one was passed. CALLER_SETUP_ARG
and a couple other places needs to be modified to handle
similar issues for other types of calls.
This includes fairly comprehensive tests for different ways
keywords are handled internally, checking that you get equal
results but that keyword splats on the caller side result in
distinct objects for keyword rest parameters.
Included are benchmarks for keyword argument calls.
Brief results when compiled without optimization:
def kw(a: 1) a end
def kws(**kw) kw end
h = {a: 1}
kw(a: 1) # about same
kw(**h) # 2.37x faster
kws(a: 1) # 1.30x faster
kws(**h) # 2.19x faster
kw(a: 1, **h) # 1.03x slower
kw(**h, **h) # about same
kws(a: 1, **h) # 1.16x faster
kws(**h, **h) # 1.14x faster
2020-02-24 23:05:07 +03:00
|
|
|
}
|
|
|
|
}
|
2018-12-28 04:06:04 +03:00
|
|
|
}
|
2023-01-13 11:52:59 +03:00
|
|
|
else if (UNLIKELY(IS_ARGS_KW_SPLAT(ci))) {
|
|
|
|
// f(**kw)
|
|
|
|
VM_ASSERT(calling->kw_splat == 1);
|
|
|
|
VALUE kwh = vm_caller_setup_keyword_hash(ci, cfp->sp[-1]);
|
2019-09-05 22:25:14 +03:00
|
|
|
|
2024-01-14 22:41:02 +03:00
|
|
|
if (kwh == Qnil || RHASH_EMPTY_P(kwh)) {
|
2019-09-05 10:27:26 +03:00
|
|
|
cfp->sp--;
|
|
|
|
calling->argc--;
|
|
|
|
calling->kw_splat = 0;
|
|
|
|
}
|
2023-01-13 11:52:59 +03:00
|
|
|
else {
|
|
|
|
cfp->sp[-1] = kwh;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (UNLIKELY(IS_ARGS_KEYWORD(ci))) {
|
|
|
|
// f(k1:1, k2:2)
|
|
|
|
VM_ASSERT(calling->kw_splat == 0);
|
|
|
|
|
|
|
|
/* This converts VM_CALL_KWARG style to VM_CALL_KW_SPLAT style
|
|
|
|
* by creating a keyword hash.
|
|
|
|
* So, vm_ci_flag(ci) & VM_CALL_KWARG is now inconsistent.
|
|
|
|
*/
|
|
|
|
vm_caller_setup_arg_kw(cfp, calling, ci);
|
2019-09-05 10:27:26 +03:00
|
|
|
}
|
2018-12-28 04:06:04 +03:00
|
|
|
}
|
|
|
|
|
2019-03-20 22:57:39 +03:00
|
|
|
#define USE_OPT_HIST 0
|
|
|
|
|
|
|
|
#if USE_OPT_HIST
|
|
|
|
#define OPT_HIST_MAX 64
|
|
|
|
static int opt_hist[OPT_HIST_MAX+1];
|
|
|
|
|
|
|
|
__attribute__((destructor))
|
|
|
|
static void
|
|
|
|
opt_hist_show_results_at_exit(void)
|
|
|
|
{
|
|
|
|
for (int i=0; i<OPT_HIST_MAX; i++) {
|
2021-09-09 17:21:06 +03:00
|
|
|
ruby_debug_printf("opt_hist\t%d\t%d\n", i, opt_hist[i]);
|
2019-03-20 22:57:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_setup_normal_opt_start(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
2020-12-14 23:40:38 +03:00
|
|
|
struct rb_calling_info *calling)
|
2019-03-20 22:57:39 +03:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
2022-03-23 22:19:48 +03:00
|
|
|
const int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
2019-03-20 22:57:39 +03:00
|
|
|
const int opt = calling->argc - lead_num;
|
2022-03-23 22:19:48 +03:00
|
|
|
const int opt_num = ISEQ_BODY(iseq)->param.opt_num;
|
|
|
|
const int opt_pc = (int)ISEQ_BODY(iseq)->param.opt_table[opt];
|
|
|
|
const int param = ISEQ_BODY(iseq)->param.size;
|
|
|
|
const int local = ISEQ_BODY(iseq)->local_table_size;
|
2019-03-20 22:57:39 +03:00
|
|
|
const int delta = opt_num - opt;
|
|
|
|
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_opt);
|
|
|
|
|
2019-03-20 22:57:39 +03:00
|
|
|
#if USE_OPT_HIST
|
|
|
|
if (opt_pc < OPT_HIST_MAX) {
|
|
|
|
opt_hist[opt]++;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opt_hist[OPT_HIST_MAX]++;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-11-27 09:41:15 +03:00
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), opt_pc, param - delta, local);
|
2019-03-20 22:57:39 +03:00
|
|
|
}
|
|
|
|
|
2019-10-24 22:40:39 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_setup_tailcall_opt_start(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
2020-12-14 23:40:38 +03:00
|
|
|
struct rb_calling_info *calling)
|
2019-10-24 22:40:39 +03:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
2022-03-23 22:19:48 +03:00
|
|
|
const int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
2019-10-24 22:40:39 +03:00
|
|
|
const int opt = calling->argc - lead_num;
|
2022-03-23 22:19:48 +03:00
|
|
|
const int opt_pc = (int)ISEQ_BODY(iseq)->param.opt_table[opt];
|
2019-10-24 22:40:39 +03:00
|
|
|
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_opt);
|
|
|
|
|
|
|
|
#if USE_OPT_HIST
|
|
|
|
if (opt_pc < OPT_HIST_MAX) {
|
|
|
|
opt_hist[opt]++;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opt_hist[OPT_HIST_MAX]++;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_iseq_setup_tailcall(ec, cfp, calling, opt_pc);
|
2019-10-24 22:40:39 +03:00
|
|
|
}
|
|
|
|
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
static void
|
|
|
|
args_setup_kw_parameters(rb_execution_context_t *const ec, const rb_iseq_t *const iseq,
|
|
|
|
VALUE *const passed_values, const int passed_keyword_len, const VALUE *const passed_keywords,
|
2019-03-22 03:21:43 +03:00
|
|
|
VALUE *const locals);
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
2024-04-25 01:42:40 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_forwardable(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
|
|
|
struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
const struct rb_callcache *cc = calling->cc;
|
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
|
|
|
int param_size = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local_size = ISEQ_BODY(iseq)->local_table_size;
|
|
|
|
|
|
|
|
// Setting up local size and param size
|
|
|
|
VM_ASSERT(ISEQ_BODY(iseq)->param.flags.forwardable);
|
|
|
|
|
|
|
|
local_size = local_size + vm_ci_argc(calling->cd->ci);
|
|
|
|
param_size = param_size + vm_ci_argc(calling->cd->ci);
|
|
|
|
|
|
|
|
cfp->sp[0] = (VALUE)calling->cd->ci;
|
|
|
|
|
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), 0, param_size, local_size);
|
|
|
|
}
|
|
|
|
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_setup_kwparm_kwarg(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
2020-12-14 23:40:38 +03:00
|
|
|
struct rb_calling_info *calling)
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
VM_ASSERT(vm_ci_flag(ci) & VM_CALL_KWARG);
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_kw1);
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
2022-03-23 22:19:48 +03:00
|
|
|
const struct rb_iseq_param_keyword *kw_param = ISEQ_BODY(iseq)->param.keyword;
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
const int ci_kw_len = kw_arg->keyword_len;
|
|
|
|
const VALUE * const ci_keywords = kw_arg->keywords;
|
|
|
|
VALUE *argv = cfp->sp - calling->argc;
|
|
|
|
VALUE *const klocals = argv + kw_param->bits_start - kw_param->num;
|
2022-03-23 22:19:48 +03:00
|
|
|
const int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
VALUE * const ci_kws = ALLOCA_N(VALUE, ci_kw_len);
|
|
|
|
MEMCPY(ci_kws, argv + lead_num, VALUE, ci_kw_len);
|
|
|
|
args_setup_kw_parameters(ec, iseq, ci_kws, ci_kw_len, ci_keywords, klocals);
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
int param = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local = ISEQ_BODY(iseq)->local_table_size;
|
2020-11-27 09:41:15 +03:00
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), 0, param, local);
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_setup_kwparm_nokwarg(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
2020-12-14 23:40:38 +03:00
|
|
|
struct rb_calling_info *calling)
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *MAYBE_UNUSED(ci) = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
VM_ASSERT((vm_ci_flag(ci) & VM_CALL_KWARG) == 0);
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_kw2);
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
2022-03-23 22:19:48 +03:00
|
|
|
const struct rb_iseq_param_keyword *kw_param = ISEQ_BODY(iseq)->param.keyword;
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
VALUE * const argv = cfp->sp - calling->argc;
|
|
|
|
VALUE * const klocals = argv + kw_param->bits_start - kw_param->num;
|
|
|
|
|
2019-11-09 03:59:32 +03:00
|
|
|
int i;
|
|
|
|
for (i=0; i<kw_param->num; i++) {
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
klocals[i] = kw_param->default_values[i];
|
|
|
|
}
|
2019-11-09 03:59:32 +03:00
|
|
|
klocals[i] = INT2FIX(0); // kw specify flag
|
2019-11-09 20:25:57 +03:00
|
|
|
// NOTE:
|
|
|
|
// nobody check this value, but it should be cleared because it can
|
|
|
|
// points invalid VALUE (T_NONE objects, raw pointer and so on).
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
int param = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local = ISEQ_BODY(iseq)->local_table_size;
|
2020-11-27 09:41:15 +03:00
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), 0, param, local);
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
}
|
|
|
|
|
2023-03-09 19:30:30 +03:00
|
|
|
static VALUE builtin_invoker0(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr);
|
|
|
|
|
|
|
|
static VALUE
|
2024-01-17 04:31:26 +03:00
|
|
|
vm_call_single_noarg_leaf_builtin(rb_execution_context_t *ec, rb_control_frame_t *cfp,
|
2023-03-09 19:30:30 +03:00
|
|
|
struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
const struct rb_builtin_function *bf = calling->cc->aux_.bf;
|
|
|
|
cfp->sp -= (calling->argc + 1);
|
2024-10-10 05:29:57 +03:00
|
|
|
rb_insn_func_t func_ptr = (rb_insn_func_t)(uintptr_t)bf->func_ptr;
|
|
|
|
return builtin_invoker0(ec, calling->recv, NULL, func_ptr);
|
2023-03-09 19:30:30 +03:00
|
|
|
}
|
|
|
|
|
2024-03-27 01:29:38 +03:00
|
|
|
VALUE rb_gen_method_name(VALUE owner, VALUE name); // in vm_backtrace.c
|
|
|
|
|
|
|
|
static void
|
|
|
|
warn_unused_block(const rb_callable_method_entry_t *cme, const rb_iseq_t *iseq, void *pc)
|
|
|
|
{
|
2024-04-17 13:46:48 +03:00
|
|
|
rb_vm_t *vm = GET_VM();
|
|
|
|
st_table *dup_check_table = vm->unused_block_warning_table;
|
2024-04-19 07:21:55 +03:00
|
|
|
st_data_t key;
|
2024-03-27 01:29:38 +03:00
|
|
|
|
|
|
|
union {
|
|
|
|
VALUE v;
|
|
|
|
unsigned char b[SIZEOF_VALUE];
|
|
|
|
} k1 = {
|
|
|
|
.v = (VALUE)pc,
|
|
|
|
}, k2 = {
|
|
|
|
.v = (VALUE)cme->def,
|
|
|
|
};
|
|
|
|
|
2024-04-17 13:46:48 +03:00
|
|
|
// relax check
|
2024-04-19 07:21:55 +03:00
|
|
|
if (!vm->unused_block_warning_strict) {
|
|
|
|
key = (st_data_t)cme->def->original_id;
|
2024-04-17 13:46:48 +03:00
|
|
|
|
2024-04-19 07:21:55 +03:00
|
|
|
if (st_lookup(dup_check_table, key, NULL)) {
|
|
|
|
return;
|
|
|
|
}
|
2024-04-17 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// strict check
|
2024-03-27 01:29:38 +03:00
|
|
|
// make unique key from pc and me->def pointer
|
2024-04-19 07:21:55 +03:00
|
|
|
key = 0;
|
2024-03-27 01:29:38 +03:00
|
|
|
for (int i=0; i<SIZEOF_VALUE; i++) {
|
|
|
|
// fprintf(stderr, "k1:%3d k2:%3d\n", k1.b[i], k2.b[SIZEOF_VALUE-1-i]);
|
|
|
|
key |= (st_data_t)(k1.b[i] ^ k2.b[SIZEOF_VALUE-1-i]) << (8 * i);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (0) {
|
|
|
|
fprintf(stderr, "SIZEOF_VALUE:%d\n", SIZEOF_VALUE);
|
2024-05-29 03:44:07 +03:00
|
|
|
fprintf(stderr, "pc:%p def:%p\n", pc, (void *)cme->def);
|
2024-03-27 01:29:38 +03:00
|
|
|
fprintf(stderr, "key:%p\n", (void *)key);
|
|
|
|
}
|
|
|
|
|
|
|
|
// duplication check
|
|
|
|
if (st_insert(dup_check_table, key, 1)) {
|
|
|
|
// already shown
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
VALUE m_loc = rb_method_entry_location((const rb_method_entry_t *)cme);
|
|
|
|
VALUE name = rb_gen_method_name(cme->defined_class, ISEQ_BODY(iseq)->location.base_label);
|
|
|
|
|
|
|
|
if (!NIL_P(m_loc)) {
|
2024-04-17 12:46:01 +03:00
|
|
|
rb_warning("the block passed to '%"PRIsVALUE"' defined at %"PRIsVALUE":%"PRIsVALUE" may be ignored",
|
2024-03-27 01:29:38 +03:00
|
|
|
name, RARRAY_AREF(m_loc, 0), RARRAY_AREF(m_loc, 1));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_warning("the block may be ignored because '%"PRIsVALUE"' does not use a block", name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-11 21:25:54 +03:00
|
|
|
static inline int
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_callee_setup_arg(rb_execution_context_t *ec, struct rb_calling_info *calling,
|
2015-10-23 20:53:35 +03:00
|
|
|
const rb_iseq_t *iseq, VALUE *argv, int param_size, int local_size)
|
2014-03-12 06:18:50 +04:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
2024-06-20 17:56:03 +03:00
|
|
|
VM_ASSERT((vm_ci_argc(ci), 1));
|
|
|
|
VM_ASSERT(vm_cc_cme(cc) != NULL);
|
|
|
|
|
2024-03-27 01:29:38 +03:00
|
|
|
if (UNLIKELY(!ISEQ_BODY(iseq)->param.flags.use_block &&
|
|
|
|
calling->block_handler != VM_BLOCK_HANDLER_NONE &&
|
2024-08-09 10:43:02 +03:00
|
|
|
!(vm_ci_flag(calling->cd->ci) & (VM_CALL_OPT_SEND | VM_CALL_SUPER)))) {
|
2024-03-27 01:29:38 +03:00
|
|
|
warn_unused_block(vm_cc_cme(cc), iseq, (void *)ec->cfp->pc);
|
|
|
|
}
|
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (LIKELY(!(vm_ci_flag(ci) & VM_CALL_KW_SPLAT))) {
|
2019-03-20 22:57:39 +03:00
|
|
|
if (LIKELY(rb_simple_iseq_p(iseq))) {
|
|
|
|
rb_control_frame_t *cfp = ec->cfp;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, lead_num);
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (calling->argc != lead_num) {
|
|
|
|
argument_arity_error(ec, iseq, calling->argc, lead_num, lead_num);
|
2019-03-20 22:57:39 +03:00
|
|
|
}
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
//VM_ASSERT(ci == calling->cd->ci);
|
2020-12-14 23:40:38 +03:00
|
|
|
VM_ASSERT(cc == calling->cc);
|
2023-03-09 19:30:30 +03:00
|
|
|
|
2024-04-24 23:39:39 +03:00
|
|
|
if (vm_call_iseq_optimizable_p(ci, cc)) {
|
2024-01-17 04:31:26 +03:00
|
|
|
if ((iseq->body->builtin_attrs & BUILTIN_ATTR_SINGLE_NOARG_LEAF) &&
|
2023-03-09 19:30:30 +03:00
|
|
|
!(ruby_vm_event_flags & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN))) {
|
|
|
|
VM_ASSERT(iseq->body->builtin_attrs & BUILTIN_ATTR_LEAF);
|
|
|
|
vm_cc_bf_set(cc, (void *)iseq->body->iseq_encoded[1]);
|
2024-01-17 04:31:26 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_single_noarg_leaf_builtin, true);
|
2023-03-09 19:30:30 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup_func(ci, param_size, local_size), true);
|
|
|
|
}
|
|
|
|
}
|
2019-03-20 22:57:39 +03:00
|
|
|
return 0;
|
|
|
|
}
|
2019-03-20 23:37:03 +03:00
|
|
|
else if (rb_iseq_only_optparam_p(iseq)) {
|
2019-03-20 22:57:39 +03:00
|
|
|
rb_control_frame_t *cfp = ec->cfp;
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
const int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
|
|
|
const int opt_num = ISEQ_BODY(iseq)->param.opt_num;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
|
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, lead_num + opt_num);
|
2019-03-20 22:57:39 +03:00
|
|
|
const int argc = calling->argc;
|
|
|
|
const int opt = argc - lead_num;
|
|
|
|
|
|
|
|
if (opt < 0 || opt > opt_num) {
|
|
|
|
argument_arity_error(ec, iseq, argc, lead_num, lead_num + opt_num);
|
|
|
|
}
|
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (LIKELY(!(vm_ci_flag(ci) & VM_CALL_TAILCALL))) {
|
2019-10-24 22:40:39 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup_normal_opt_start,
|
|
|
|
!IS_ARGS_SPLAT(ci) && !IS_ARGS_KEYWORD(ci) &&
|
2024-04-24 23:39:39 +03:00
|
|
|
vm_call_cacheable(ci, cc));
|
2019-10-24 22:40:39 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup_tailcall_opt_start,
|
|
|
|
!IS_ARGS_SPLAT(ci) && !IS_ARGS_KEYWORD(ci) &&
|
2024-04-24 23:39:39 +03:00
|
|
|
vm_call_cacheable(ci, cc));
|
2019-10-24 22:40:39 +03:00
|
|
|
}
|
2019-03-20 22:57:39 +03:00
|
|
|
|
|
|
|
/* initialize opt vars for self-references */
|
2022-03-23 22:19:48 +03:00
|
|
|
VM_ASSERT((int)ISEQ_BODY(iseq)->param.size == lead_num + opt_num);
|
2019-03-20 22:57:39 +03:00
|
|
|
for (int i=argc; i<lead_num + opt_num; i++) {
|
|
|
|
argv[i] = Qnil;
|
|
|
|
}
|
2022-03-23 22:19:48 +03:00
|
|
|
return (int)ISEQ_BODY(iseq)->param.opt_table[opt];
|
2019-03-20 22:57:39 +03:00
|
|
|
}
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
else if (rb_iseq_only_kwparam_p(iseq) && !IS_ARGS_SPLAT(ci)) {
|
2022-03-23 22:19:48 +03:00
|
|
|
const int lead_num = ISEQ_BODY(iseq)->param.lead_num;
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
const int argc = calling->argc;
|
2022-03-23 22:19:48 +03:00
|
|
|
const struct rb_iseq_param_keyword *kw_param = ISEQ_BODY(iseq)->param.keyword;
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (vm_ci_flag(ci) & VM_CALL_KWARG) {
|
|
|
|
const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
|
|
|
if (argc - kw_arg->keyword_len == lead_num) {
|
|
|
|
const int ci_kw_len = kw_arg->keyword_len;
|
|
|
|
const VALUE * const ci_keywords = kw_arg->keywords;
|
|
|
|
VALUE * const ci_kws = ALLOCA_N(VALUE, ci_kw_len);
|
|
|
|
MEMCPY(ci_kws, argv + lead_num, VALUE, ci_kw_len);
|
|
|
|
|
|
|
|
VALUE *const klocals = argv + kw_param->bits_start - kw_param->num;
|
|
|
|
args_setup_kw_parameters(ec, iseq, ci_kws, ci_kw_len, ci_keywords, klocals);
|
|
|
|
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup_kwparm_kwarg,
|
2024-04-24 23:39:39 +03:00
|
|
|
vm_call_cacheable(ci, cc));
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (argc == lead_num) {
|
|
|
|
/* no kwarg */
|
|
|
|
VALUE *const klocals = argv + kw_param->bits_start - kw_param->num;
|
|
|
|
args_setup_kw_parameters(ec, iseq, NULL, 0, NULL, klocals);
|
|
|
|
|
|
|
|
if (klocals[kw_param->num] == INT2FIX(0)) {
|
|
|
|
/* copy from default_values */
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup_kwparm_nokwarg,
|
2024-04-24 23:39:39 +03:00
|
|
|
vm_call_cacheable(ci, cc));
|
optimize method dispatch for lead/kw params.
similar idea to r67315, provide the following optimization
for method dispatch with lead and kw parameters.
(1) add a special branch to check passing kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0, k:1)
(2) add a special branch to check passing no-kw arguments to
a method which has lead and kw parameters.
ex) def foo(x, k:1); end; foo(0)
For (1) and (2) cases, provide special dispatchers. For (2) case,
this patch only use the special dispatcher if all default
kw parameters are literal values (nil, 1, and so on. In other case,
kw->default_values does not contains Qundef) (and no required kw
parameters becaseu they don't pass any keyword parameters).
Passing keyword arguments with a hash object is not a scope of
this patch.
Without this patch, (1) and (2) cases use `setup_parameters_complex()`.
Especially, (2) seems frequent case for methods which extend a normal
usecase with keyword parameters (like: `exception: true`).
We can measure the performance with benchmark-driver:
With methods: def kw k1:1, k2:2; end
def m; end
With the following binaries:
clean-miniruby: unmodified trunk.
opt_miniruby1: use special branches for lead/kw parameters.
opt_miniruby2: use special dispatchers for lead/kw parameters.
opt_cc_miniruby: apply step (2).
Result with benchmark-driver:
m
opt_miniruby2: 75222278.0 i/s
clean-miniruby: 73177896.5 i/s - 1.03x slower
opt_miniruby1: 62466783.3 i/s - 1.20x slower
kw
opt_miniruby2: 52044504.4 i/s
opt_miniruby1: 29142025.7 i/s - 1.79x slower
clean-miniruby: 20515235.4 i/s - 2.54x slower
kw k1: 10
opt_miniruby2: 26492219.5 i/s
opt_miniruby1: 25409484.9 i/s - 1.04x slower
clean-miniruby: 20235113.7 i/s - 1.31x slower
kw k1: 10, k2: 20
opt_miniruby1: 24159534.0 i/s
opt_miniruby2: 23470527.5 i/s - 1.03x slower
clean-miniruby: 17822621.5 i/s - 1.36x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67333 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2019-03-22 03:21:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2014-03-12 06:18:50 +04:00
|
|
|
}
|
2019-03-20 22:57:39 +03:00
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
// Called iseq is using ... param
|
|
|
|
// def foo(...) # <- iseq for foo will have "forwardable"
|
|
|
|
//
|
|
|
|
// We want to set the `...` local to the caller's CI
|
|
|
|
// foo(1, 2) # <- the ci for this should end up as `...`
|
|
|
|
//
|
|
|
|
// So hopefully the stack looks like:
|
|
|
|
//
|
|
|
|
// => 1
|
|
|
|
// => 2
|
|
|
|
// => *
|
|
|
|
// => **
|
|
|
|
// => &
|
|
|
|
// => ... # <- points at `foo`s CI
|
|
|
|
// => cref_or_me
|
|
|
|
// => specval
|
|
|
|
// => type
|
|
|
|
//
|
|
|
|
if (ISEQ_BODY(iseq)->param.flags.forwardable) {
|
2024-04-25 01:42:40 +03:00
|
|
|
bool can_fastpath = true;
|
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
if ((vm_ci_flag(ci) & VM_CALL_FORWARDING)) {
|
|
|
|
struct rb_forwarding_call_data * forward_cd = (struct rb_forwarding_call_data *)calling->cd;
|
|
|
|
if (vm_ci_argc(ci) != vm_ci_argc(forward_cd->caller_ci)) {
|
|
|
|
ci = vm_ci_new_runtime(
|
|
|
|
vm_ci_mid(ci),
|
|
|
|
vm_ci_flag(ci),
|
|
|
|
vm_ci_argc(ci),
|
|
|
|
vm_ci_kwarg(ci));
|
|
|
|
} else {
|
|
|
|
ci = forward_cd->caller_ci;
|
|
|
|
}
|
2024-04-25 01:42:40 +03:00
|
|
|
can_fastpath = false;
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
// C functions calling iseqs will stack allocate a CI,
|
|
|
|
// so we need to convert it to heap allocated
|
|
|
|
if (!vm_ci_markable(ci)) {
|
|
|
|
ci = vm_ci_new_runtime(
|
|
|
|
vm_ci_mid(ci),
|
|
|
|
vm_ci_flag(ci),
|
|
|
|
vm_ci_argc(ci),
|
|
|
|
vm_ci_kwarg(ci));
|
2024-04-25 01:42:40 +03:00
|
|
|
can_fastpath = false;
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
argv[param_size - 1] = (VALUE)ci;
|
2024-04-25 01:42:40 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_forwardable, can_fastpath);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-03-20 22:57:39 +03:00
|
|
|
return setup_parameters_complex(ec, iseq, calling, ci, argv, arg_setup_method);
|
2014-03-12 06:18:50 +04:00
|
|
|
}
|
2012-10-15 00:59:21 +04:00
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
static void
|
2024-06-07 03:03:18 +03:00
|
|
|
vm_adjust_stack_forwarding(const struct rb_execution_context_struct *ec, struct rb_control_frame_struct *cfp, int argc, VALUE splat)
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
{
|
|
|
|
// This case is when the caller is using a ... parameter.
|
|
|
|
// For example `bar(...)`. The call info will have VM_CALL_FORWARDING
|
|
|
|
// In this case the caller's caller's CI will be on the stack.
|
|
|
|
//
|
|
|
|
// For example:
|
|
|
|
//
|
|
|
|
// def bar(a, b); a + b; end
|
|
|
|
// def foo(...); bar(...); end
|
|
|
|
// foo(1, 2) # <- this CI will be on the stack when we call `bar(...)`
|
|
|
|
//
|
|
|
|
// Stack layout will be:
|
|
|
|
//
|
|
|
|
// > 1
|
|
|
|
// > 2
|
|
|
|
// > CI for foo(1, 2)
|
|
|
|
// > cref_or_me
|
|
|
|
// > specval
|
|
|
|
// > type
|
|
|
|
// > receiver
|
|
|
|
// > CI for foo(1, 2), via `getlocal ...`
|
|
|
|
// > ( SP points here )
|
|
|
|
const VALUE * lep = VM_CF_LEP(cfp);
|
|
|
|
|
|
|
|
const rb_iseq_t *iseq;
|
|
|
|
|
|
|
|
// If we're in an escaped environment (lambda for example), get the iseq
|
|
|
|
// from the captured env.
|
|
|
|
if (VM_ENV_FLAGS(lep, VM_ENV_FLAG_ESCAPED)) {
|
|
|
|
rb_env_t * env = (rb_env_t *)lep[VM_ENV_DATA_INDEX_ENV];
|
|
|
|
iseq = env->iseq;
|
|
|
|
}
|
|
|
|
else { // Otherwise use the lep to find the caller
|
|
|
|
iseq = rb_vm_search_cf_from_ep(ec, cfp, lep)->iseq;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Our local storage is below the args we need to copy
|
|
|
|
int local_size = ISEQ_BODY(iseq)->local_table_size + argc;
|
|
|
|
|
|
|
|
const VALUE * from = lep - (local_size + VM_ENV_DATA_SIZE - 1); // 2 for EP values
|
|
|
|
VALUE * to = cfp->sp - 1; // clobber the CI
|
|
|
|
|
|
|
|
if (RTEST(splat)) {
|
|
|
|
to -= 1; // clobber the splat array
|
|
|
|
CHECK_VM_STACK_OVERFLOW0(cfp, to, RARRAY_LEN(splat));
|
|
|
|
MEMCPY(to, RARRAY_CONST_PTR(splat), VALUE, RARRAY_LEN(splat));
|
|
|
|
to += RARRAY_LEN(splat);
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK_VM_STACK_OVERFLOW0(cfp, to, argc);
|
|
|
|
MEMCPY(to, from, VALUE, argc);
|
|
|
|
cfp->sp = to + argc;
|
|
|
|
|
|
|
|
// Stack layout should now be:
|
|
|
|
//
|
|
|
|
// > 1
|
|
|
|
// > 2
|
|
|
|
// > CI for foo(1, 2)
|
|
|
|
// > cref_or_me
|
|
|
|
// > specval
|
|
|
|
// > type
|
|
|
|
// > receiver
|
|
|
|
// > 1
|
|
|
|
// > 2
|
|
|
|
// > ( SP points here )
|
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_iseq_setup(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2007-12-18 15:07:51 +03:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_setup);
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
int param_size = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local_size = ISEQ_BODY(iseq)->local_table_size;
|
|
|
|
|
2024-05-25 00:33:03 +03:00
|
|
|
RUBY_ASSERT(!ISEQ_BODY(iseq)->param.flags.forwardable);
|
|
|
|
|
|
|
|
const int opt_pc = vm_callee_setup_arg(ec, calling, iseq, cfp->sp - calling->argc, param_size, local_size);
|
|
|
|
return vm_call_iseq_setup_2(ec, cfp, calling, opt_pc, param_size, local_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_iseq_fwd_setup(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_setup);
|
|
|
|
|
|
|
|
const struct rb_callcache *cc = calling->cc;
|
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(vm_cc_cme(cc)->def);
|
|
|
|
int param_size = ISEQ_BODY(iseq)->param.size;
|
|
|
|
int local_size = ISEQ_BODY(iseq)->local_table_size;
|
|
|
|
|
|
|
|
RUBY_ASSERT(ISEQ_BODY(iseq)->param.flags.forwardable);
|
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
// Setting up local size and param size
|
2024-05-25 00:33:03 +03:00
|
|
|
local_size = local_size + vm_ci_argc(calling->cd->ci);
|
|
|
|
param_size = param_size + vm_ci_argc(calling->cd->ci);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
2023-02-05 00:46:46 +03:00
|
|
|
const int opt_pc = vm_callee_setup_arg(ec, calling, iseq, cfp->sp - calling->argc, param_size, local_size);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_iseq_setup_2(ec, cfp, calling, opt_pc, param_size, local_size);
|
2007-12-18 15:07:51 +03:00
|
|
|
}
|
|
|
|
|
2015-09-19 20:59:58 +03:00
|
|
|
static inline VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_iseq_setup_2(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling,
|
|
|
|
int opt_pc, int param_size, int local_size)
|
2012-10-17 01:20:11 +04:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (LIKELY(!(vm_ci_flag(ci) & VM_CALL_TAILCALL))) {
|
2020-11-27 09:41:15 +03:00
|
|
|
return vm_call_iseq_setup_normal(ec, cfp, calling, vm_cc_cme(cc), opt_pc, param_size, local_size);
|
2012-10-17 01:20:11 +04:00
|
|
|
}
|
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_iseq_setup_tailcall(ec, cfp, calling, opt_pc);
|
2012-10-17 01:20:11 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-18 10:14:39 +04:00
|
|
|
static inline VALUE
|
2018-09-24 09:09:55 +03:00
|
|
|
vm_call_iseq_setup_normal(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, const rb_callable_method_entry_t *me,
|
2020-11-27 09:41:15 +03:00
|
|
|
int opt_pc, int param_size, int local_size)
|
2007-12-18 15:07:51 +03:00
|
|
|
{
|
2018-08-12 03:30:04 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(me->def);
|
|
|
|
VALUE *argv = cfp->sp - calling->argc;
|
2015-10-23 20:53:35 +03:00
|
|
|
VALUE *sp = argv + param_size;
|
2015-10-11 00:20:59 +03:00
|
|
|
cfp->sp = argv - 1 /* recv */;
|
2007-12-18 15:07:51 +03:00
|
|
|
|
2020-11-27 09:41:15 +03:00
|
|
|
vm_push_frame(ec, iseq, VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, calling->recv,
|
2018-08-12 03:30:04 +03:00
|
|
|
calling->block_handler, (VALUE)me,
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->iseq_encoded + opt_pc, sp,
|
2018-08-11 06:38:38 +03:00
|
|
|
local_size - param_size,
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->stack_max);
|
2012-10-17 01:20:11 +04:00
|
|
|
return Qundef;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2012-10-18 10:14:39 +04:00
|
|
|
static inline VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_iseq_setup_tailcall(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, int opt_pc)
|
2012-10-17 01:20:11 +04:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2015-07-25 00:44:14 +03:00
|
|
|
unsigned int i;
|
2015-09-19 20:59:58 +03:00
|
|
|
VALUE *argv = cfp->sp - calling->argc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_callable_method_entry_t *me = vm_cc_cme(cc);
|
2015-07-22 00:19:02 +03:00
|
|
|
const rb_iseq_t *iseq = def_iseq_ptr(me->def);
|
2012-10-17 01:20:11 +04:00
|
|
|
VALUE *src_argv = argv;
|
|
|
|
VALUE *sp_orig, *sp;
|
2016-08-03 03:28:12 +03:00
|
|
|
VALUE finish_flag = VM_FRAME_FINISHED_P(cfp) ? VM_FRAME_FLAG_FINISH : 0;
|
2007-12-18 15:07:51 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
if (VM_BH_FROM_CFP_P(calling->block_handler, cfp)) {
|
|
|
|
struct rb_captured_block *dst_captured = VM_CFP_TO_CAPTURED_BLOCK(RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp));
|
|
|
|
const struct rb_captured_block *src_captured = VM_BH_TO_CAPT_BLOCK(calling->block_handler);
|
|
|
|
dst_captured->code.val = src_captured->code.val;
|
|
|
|
if (VM_BH_ISEQ_BLOCK_P(calling->block_handler)) {
|
|
|
|
calling->block_handler = VM_BH_FROM_ISEQ_BLOCK(dst_captured);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
calling->block_handler = VM_BH_FROM_IFUNC_BLOCK(dst_captured);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-27 05:49:30 +03:00
|
|
|
vm_pop_frame(ec, cfp, cfp->ep);
|
|
|
|
cfp = ec->cfp;
|
2009-08-15 22:18:07 +04:00
|
|
|
|
2012-10-17 01:20:11 +04:00
|
|
|
sp_orig = sp = cfp->sp;
|
2007-12-18 15:07:51 +03:00
|
|
|
|
2012-10-17 01:20:11 +04:00
|
|
|
/* push self */
|
2015-09-19 20:59:58 +03:00
|
|
|
sp[0] = calling->recv;
|
2012-10-17 01:20:11 +04:00
|
|
|
sp++;
|
2009-08-12 09:55:06 +04:00
|
|
|
|
2012-10-17 01:20:11 +04:00
|
|
|
/* copy arguments */
|
2022-03-23 22:19:48 +03:00
|
|
|
for (i=0; i < ISEQ_BODY(iseq)->param.size; i++) {
|
2012-10-17 01:20:11 +04:00
|
|
|
*sp++ = src_argv[i];
|
|
|
|
}
|
2012-08-02 15:34:19 +04:00
|
|
|
|
2017-10-27 05:49:30 +03:00
|
|
|
vm_push_frame(ec, iseq, VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL | finish_flag,
|
2022-03-23 22:19:48 +03:00
|
|
|
calling->recv, calling->block_handler, (VALUE)me,
|
|
|
|
ISEQ_BODY(iseq)->iseq_encoded + opt_pc, sp,
|
|
|
|
ISEQ_BODY(iseq)->local_table_size - ISEQ_BODY(iseq)->param.size,
|
|
|
|
ISEQ_BODY(iseq)->stack_max);
|
2012-10-17 01:20:11 +04:00
|
|
|
|
|
|
|
cfp->sp = sp_orig;
|
2019-11-28 21:22:24 +03:00
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
return Qundef;
|
2012-08-02 15:34:19 +04:00
|
|
|
}
|
|
|
|
|
2020-11-30 10:18:43 +03:00
|
|
|
static void
|
|
|
|
ractor_unsafe_check(void)
|
|
|
|
{
|
|
|
|
if (!rb_ractor_main_p()) {
|
|
|
|
rb_raise(rb_eRactorUnsafeError, "ractor unsafe method called from not main ractor");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_m2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-08-02 15:34:19 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2022-09-13 10:26:33 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, rb_ary_new4(argc, argv));
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2012-08-02 15:34:19 +04:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_m1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2022-09-13 10:26:33 +03:00
|
|
|
VALUE(*f)(int, const VALUE *, VALUE) = (VALUE(*)(int, const VALUE *, VALUE))func;
|
|
|
|
return (*f)(argc, argv, recv);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_0(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE) = (VALUE(*)(VALUE))func;
|
|
|
|
return (*f)(recv);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_3(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_4(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_5(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_6(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_7(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_8(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_9(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_10(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_11(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_12(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_13(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_14(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2020-11-30 10:18:43 +03:00
|
|
|
ractor_unsafe_check();
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
2020-11-30 10:18:43 +03:00
|
|
|
|
2012-10-19 14:38:30 +04:00
|
|
|
static VALUE
|
2019-02-22 10:25:51 +03:00
|
|
|
call_cfunc_15(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2020-11-30 10:18:43 +03:00
|
|
|
{
|
|
|
|
ractor_unsafe_check();
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13], argv[14]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_m2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
2022-09-13 10:26:33 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, rb_ary_new4(argc, argv));
|
2020-11-30 10:18:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_m1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
2022-09-13 10:26:33 +03:00
|
|
|
VALUE(*f)(int, const VALUE *, VALUE) = (VALUE(*)(int, const VALUE *, VALUE))func;
|
|
|
|
return (*f)(argc, argv, recv);
|
2020-11-30 10:18:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_0(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE) = (VALUE(*)(VALUE))func;
|
|
|
|
return (*f)(recv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_1(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE) = (VALUE(*)(VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_2(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_3(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_4(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_5(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_6(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_7(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_8(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_9(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_10(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_11(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_12(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_13(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_14(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
|
|
|
{
|
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
ractor_safe_call_cfunc_15(VALUE recv, int argc, const VALUE *argv, VALUE (*func)(ANYARGS))
|
2012-10-19 14:38:30 +04:00
|
|
|
{
|
2019-02-22 10:25:51 +03:00
|
|
|
VALUE(*f)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) = (VALUE(*)(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE))func;
|
|
|
|
return (*f)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13], argv[14]);
|
2012-10-19 14:38:30 +04:00
|
|
|
}
|
|
|
|
|
2017-08-18 15:44:30 +03:00
|
|
|
static inline int
|
2017-10-27 05:49:30 +03:00
|
|
|
vm_cfp_consistent_p(rb_execution_context_t *ec, const rb_control_frame_t *reg_cfp)
|
2017-08-18 15:44:30 +03:00
|
|
|
{
|
|
|
|
const int ov_flags = RAISED_STACKOVERFLOW;
|
2017-10-27 05:49:30 +03:00
|
|
|
if (LIKELY(reg_cfp == ec->cfp + 1)) return TRUE;
|
2017-11-07 08:22:09 +03:00
|
|
|
if (rb_ec_raised_p(ec, ov_flags)) {
|
|
|
|
rb_ec_raised_reset(ec, ov_flags);
|
2017-08-18 15:44:30 +03:00
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2017-04-11 07:17:45 +03:00
|
|
|
#define CHECK_CFP_CONSISTENCY(func) \
|
2017-10-27 05:49:30 +03:00
|
|
|
(LIKELY(vm_cfp_consistent_p(ec, reg_cfp)) ? (void)0 : \
|
2018-01-02 09:41:40 +03:00
|
|
|
rb_bug(func ": cfp consistency error (%p, %p)", (void *)reg_cfp, (void *)(ec->cfp+1)))
|
2017-04-11 07:17:45 +03:00
|
|
|
|
2013-09-09 09:17:19 +04:00
|
|
|
static inline
|
|
|
|
const rb_method_cfunc_t *
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
vm_method_cfunc_entry(const rb_callable_method_entry_t *me)
|
2013-09-09 09:17:19 +04:00
|
|
|
{
|
|
|
|
#if VM_DEBUG_VERIFY_METHOD_CACHE
|
|
|
|
switch (me->def->type) {
|
|
|
|
case VM_METHOD_TYPE_CFUNC:
|
|
|
|
case VM_METHOD_TYPE_NOTIMPLEMENTED:
|
|
|
|
break;
|
|
|
|
# define METHOD_BUG(t) case VM_METHOD_TYPE_##t: rb_bug("wrong method type: " #t)
|
|
|
|
METHOD_BUG(ISEQ);
|
|
|
|
METHOD_BUG(ATTRSET);
|
|
|
|
METHOD_BUG(IVAR);
|
|
|
|
METHOD_BUG(BMETHOD);
|
|
|
|
METHOD_BUG(ZSUPER);
|
|
|
|
METHOD_BUG(UNDEF);
|
|
|
|
METHOD_BUG(OPTIMIZED);
|
|
|
|
METHOD_BUG(MISSING);
|
|
|
|
METHOD_BUG(REFINED);
|
2015-05-30 21:45:28 +03:00
|
|
|
METHOD_BUG(ALIAS);
|
2013-09-09 09:17:19 +04:00
|
|
|
# undef METHOD_BUG
|
|
|
|
default:
|
|
|
|
rb_bug("wrong method type: %d", me->def->type);
|
|
|
|
}
|
|
|
|
#endif
|
2019-05-31 09:58:50 +03:00
|
|
|
return UNALIGNED_MEMBER_PTR(me->def, body.cfunc);
|
2013-09-09 09:17:19 +04:00
|
|
|
}
|
|
|
|
|
2024-08-03 03:53:13 +03:00
|
|
|
static VALUE
|
2023-01-12 17:56:29 +03:00
|
|
|
vm_call_cfunc_with_frame_(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling,
|
|
|
|
int argc, VALUE *argv, VALUE *stack_bottom)
|
2012-08-08 11:52:19 +04:00
|
|
|
{
|
2020-04-14 06:32:59 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_cfunc_with_frame);
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
* vm_core.h, vm_insnhelper.c, vm_eval.c (OPT_CALL_CFUNC_WITHOUT_FRAME):
add a new otpimization and its macro `OPT_CALL_CFUNC_WITHOUT_FRAME'.
This optimization makes all cfunc method calls `frameless', which
is fster than ordinal cfunc method call.
If `frame' is needed (for example, it calls another method with
`rb_funcall()'), then build a frame. In other words, this
optimization delays frame building.
However, to delay the frame building, we need additional overheads:
(1) Store the last call information.
(2) Check the delayed frame buidling before the frame is needed.
(3) Overhead to build a delayed frame.
rb_thread_t::passed_ci is storage of delayed cfunc call information.
(1) is lightweight because it is only 1 assignment to `passed_ci'.
To achieve (2), we modify GET_THREAD() to check `passed_ci' every
time. It causes 10% overhead on my envrionment.
This optimization only works for cfunc methods which do not need
their `frame'.
After evaluation on my environment, this optimization does not
effective every time. Because of this evaluation results, this
optimization is disabled at default.
* vm_insnhelper.c, vm.c: add VM_PROFILE* macros to measure behaviour
of VM internals. I will extend this feature.
* vm_method.c, method.h: change parameters of the `invoker' function.
Receive `func' pointer as the first parameter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-23 08:22:31 +04:00
|
|
|
VALUE val;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_callable_method_entry_t *me = vm_cc_cme(cc);
|
2013-09-09 09:17:19 +04:00
|
|
|
const rb_method_cfunc_t *cfunc = vm_method_cfunc_entry(me);
|
2012-11-13 13:48:08 +04:00
|
|
|
|
2015-09-19 20:59:58 +03:00
|
|
|
VALUE recv = calling->recv;
|
2016-07-28 14:02:30 +03:00
|
|
|
VALUE block_handler = calling->block_handler;
|
2019-09-03 21:32:02 +03:00
|
|
|
VALUE frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
|
2019-09-01 07:51:02 +03:00
|
|
|
|
2019-09-05 12:34:07 +03:00
|
|
|
if (UNLIKELY(calling->kw_splat)) {
|
2019-09-03 21:32:02 +03:00
|
|
|
frame_type |= VM_FRAME_FLAG_CFRAME_KW;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2023-01-12 17:56:29 +03:00
|
|
|
VM_ASSERT(reg_cfp == ec->cfp);
|
|
|
|
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_CMETHOD_ENTRY_HOOK(ec, me->owner, me->def->original_id);
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_CALL, recv, me->def->original_id, vm_ci_mid(ci), me->owner, Qundef);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2019-09-03 21:32:02 +03:00
|
|
|
vm_push_frame(ec, NULL, frame_type, recv,
|
2016-07-28 14:02:30 +03:00
|
|
|
block_handler, (VALUE)me,
|
2017-10-27 05:49:30 +03:00
|
|
|
0, ec->cfp->sp, 0, 0);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2023-01-12 17:56:29 +03:00
|
|
|
int len = cfunc->argc;
|
2012-11-13 13:48:08 +04:00
|
|
|
if (len >= 0) rb_check_arity(argc, len, len);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2023-01-12 17:56:29 +03:00
|
|
|
reg_cfp->sp = stack_bottom;
|
|
|
|
val = (*cfunc->invoker)(recv, argc, argv, cfunc->func);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2017-04-11 07:17:45 +03:00
|
|
|
CHECK_CFP_CONSISTENCY("vm_call_cfunc");
|
2012-08-08 11:52:19 +04:00
|
|
|
|
2017-10-27 05:49:30 +03:00
|
|
|
rb_vm_pop_frame(ec);
|
2012-08-23 11:22:40 +04:00
|
|
|
|
2023-01-12 17:56:29 +03:00
|
|
|
VM_ASSERT(ec->cfp->sp == stack_bottom);
|
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, recv, me->def->original_id, vm_ci_mid(ci), me->owner, val);
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
|
* vm_core.h, vm_insnhelper.c, vm_eval.c (OPT_CALL_CFUNC_WITHOUT_FRAME):
add a new otpimization and its macro `OPT_CALL_CFUNC_WITHOUT_FRAME'.
This optimization makes all cfunc method calls `frameless', which
is fster than ordinal cfunc method call.
If `frame' is needed (for example, it calls another method with
`rb_funcall()'), then build a frame. In other words, this
optimization delays frame building.
However, to delay the frame building, we need additional overheads:
(1) Store the last call information.
(2) Check the delayed frame buidling before the frame is needed.
(3) Overhead to build a delayed frame.
rb_thread_t::passed_ci is storage of delayed cfunc call information.
(1) is lightweight because it is only 1 assignment to `passed_ci'.
To achieve (2), we modify GET_THREAD() to check `passed_ci' every
time. It causes 10% overhead on my envrionment.
This optimization only works for cfunc methods which do not need
their `frame'.
After evaluation on my environment, this optimization does not
effective every time. Because of this evaluation results, this
optimization is disabled at default.
* vm_insnhelper.c, vm.c: add VM_PROFILE* macros to measure behaviour
of VM internals. I will extend this feature.
* vm_method.c, method.h: change parameters of the `invoker' function.
Receive `func' pointer as the first parameter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-23 08:22:31 +04:00
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2024-02-23 22:08:09 +03:00
|
|
|
// Push a C method frame for a given cme. This is called when JIT code skipped
|
|
|
|
// pushing a frame but the C method reached a point where a frame is needed.
|
|
|
|
void
|
|
|
|
rb_vm_push_cfunc_frame(const rb_callable_method_entry_t *cme, int recv_idx)
|
|
|
|
{
|
|
|
|
VM_ASSERT(cme->def->type == VM_METHOD_TYPE_CFUNC);
|
|
|
|
rb_execution_context_t *ec = GET_EC();
|
|
|
|
VALUE *sp = ec->cfp->sp;
|
|
|
|
VALUE recv = *(sp - recv_idx - 1);
|
|
|
|
VALUE frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
|
|
|
|
VALUE block_handler = VM_BLOCK_HANDLER_NONE;
|
|
|
|
#if VM_CHECK_MODE > 0
|
|
|
|
// Clean up the stack canary since we're about to satisfy the "leaf or lazy push" assumption
|
|
|
|
*(GET_EC()->cfp->sp) = Qfalse;
|
|
|
|
#endif
|
|
|
|
vm_push_frame(ec, NULL, frame_type, recv, block_handler, (VALUE)cme, 0, ec->cfp->sp, 0, 0);
|
|
|
|
}
|
|
|
|
|
2023-01-13 11:52:59 +03:00
|
|
|
// If true, cc->call needs to include `CALLER_SETUP_ARG` (i.e. can't be skipped in fastpath)
|
2023-03-07 09:02:03 +03:00
|
|
|
bool
|
2023-01-13 11:52:59 +03:00
|
|
|
rb_splat_or_kwargs_p(const struct rb_callinfo *restrict ci)
|
|
|
|
{
|
|
|
|
return IS_ARGS_SPLAT(ci) || IS_ARGS_KW_OR_KW_SPLAT(ci);
|
|
|
|
}
|
|
|
|
|
2023-01-12 17:56:29 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_cfunc_with_frame(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
int argc = calling->argc;
|
|
|
|
VALUE *stack_bottom = reg_cfp->sp - argc - 1;
|
|
|
|
VALUE *argv = &stack_bottom[1];
|
|
|
|
|
|
|
|
return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, argv, stack_bottom);
|
|
|
|
}
|
|
|
|
|
* vm_core.h, vm_insnhelper.c, vm_eval.c (OPT_CALL_CFUNC_WITHOUT_FRAME):
add a new otpimization and its macro `OPT_CALL_CFUNC_WITHOUT_FRAME'.
This optimization makes all cfunc method calls `frameless', which
is fster than ordinal cfunc method call.
If `frame' is needed (for example, it calls another method with
`rb_funcall()'), then build a frame. In other words, this
optimization delays frame building.
However, to delay the frame building, we need additional overheads:
(1) Store the last call information.
(2) Check the delayed frame buidling before the frame is needed.
(3) Overhead to build a delayed frame.
rb_thread_t::passed_ci is storage of delayed cfunc call information.
(1) is lightweight because it is only 1 assignment to `passed_ci'.
To achieve (2), we modify GET_THREAD() to check `passed_ci' every
time. It causes 10% overhead on my envrionment.
This optimization only works for cfunc methods which do not need
their `frame'.
After evaluation on my environment, this optimization does not
effective every time. Because of this evaluation results, this
optimization is disabled at default.
* vm_insnhelper.c, vm.c: add VM_PROFILE* macros to measure behaviour
of VM internals. I will extend this feature.
* vm_method.c, method.h: change parameters of the `invoker' function.
Receive `func' pointer as the first parameter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-23 08:22:31 +04:00
|
|
|
static VALUE
|
2023-04-01 19:19:35 +03:00
|
|
|
vm_call_cfunc_other(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
* vm_core.h, vm_insnhelper.c, vm_eval.c (OPT_CALL_CFUNC_WITHOUT_FRAME):
add a new otpimization and its macro `OPT_CALL_CFUNC_WITHOUT_FRAME'.
This optimization makes all cfunc method calls `frameless', which
is fster than ordinal cfunc method call.
If `frame' is needed (for example, it calls another method with
`rb_funcall()'), then build a frame. In other words, this
optimization delays frame building.
However, to delay the frame building, we need additional overheads:
(1) Store the last call information.
(2) Check the delayed frame buidling before the frame is needed.
(3) Overhead to build a delayed frame.
rb_thread_t::passed_ci is storage of delayed cfunc call information.
(1) is lightweight because it is only 1 assignment to `passed_ci'.
To achieve (2), we modify GET_THREAD() to check `passed_ci' every
time. It causes 10% overhead on my envrionment.
This optimization only works for cfunc methods which do not need
their `frame'.
After evaluation on my environment, this optimization does not
effective every time. Because of this evaluation results, this
optimization is disabled at default.
* vm_insnhelper.c, vm.c: add VM_PROFILE* macros to measure behaviour
of VM internals. I will extend this feature.
* vm_method.c, method.h: change parameters of the `invoker' function.
Receive `func' pointer as the first parameter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-23 08:22:31 +04:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2023-04-01 19:19:35 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_cfunc_other);
|
2019-03-22 10:57:26 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV_KEEP_KWSPLAT);
|
2023-01-12 17:56:29 +03:00
|
|
|
VALUE argv_ary;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(argv_ary = calling->heap_argv)) {
|
2023-01-13 11:52:59 +03:00
|
|
|
VM_ASSERT(!IS_ARGS_KEYWORD(ci));
|
2023-01-12 17:56:29 +03:00
|
|
|
int argc = RARRAY_LENINT(argv_ary);
|
2023-01-13 11:52:59 +03:00
|
|
|
VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
|
2023-01-12 17:56:29 +03:00
|
|
|
VALUE *stack_bottom = reg_cfp->sp - 2;
|
|
|
|
|
|
|
|
VM_ASSERT(calling->argc == 1);
|
|
|
|
VM_ASSERT(RB_TYPE_P(argv_ary, T_ARRAY));
|
|
|
|
VM_ASSERT(RBASIC_CLASS(argv_ary) == 0); // hidden ary
|
|
|
|
|
|
|
|
return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, argv, stack_bottom);
|
|
|
|
}
|
|
|
|
else {
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_with_frame, !rb_splat_or_kwargs_p(ci) && !calling->kw_splat && !(vm_ci_flag(ci) & VM_CALL_FORWARDING));
|
2023-01-12 17:56:29 +03:00
|
|
|
|
|
|
|
return vm_call_cfunc_with_frame(ec, reg_cfp, calling);
|
|
|
|
}
|
* vm_core.h, vm_insnhelper.c, vm_eval.c (OPT_CALL_CFUNC_WITHOUT_FRAME):
add a new otpimization and its macro `OPT_CALL_CFUNC_WITHOUT_FRAME'.
This optimization makes all cfunc method calls `frameless', which
is fster than ordinal cfunc method call.
If `frame' is needed (for example, it calls another method with
`rb_funcall()'), then build a frame. In other words, this
optimization delays frame building.
However, to delay the frame building, we need additional overheads:
(1) Store the last call information.
(2) Check the delayed frame buidling before the frame is needed.
(3) Overhead to build a delayed frame.
rb_thread_t::passed_ci is storage of delayed cfunc call information.
(1) is lightweight because it is only 1 assignment to `passed_ci'.
To achieve (2), we modify GET_THREAD() to check `passed_ci' every
time. It causes 10% overhead on my envrionment.
This optimization only works for cfunc methods which do not need
their `frame'.
After evaluation on my environment, this optimization does not
effective every time. Because of this evaluation results, this
optimization is disabled at default.
* vm_insnhelper.c, vm.c: add VM_PROFILE* macros to measure behaviour
of VM internals. I will extend this feature.
* vm_method.c, method.h: change parameters of the `invoker' function.
Receive `func' pointer as the first parameter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-23 08:22:31 +04:00
|
|
|
}
|
|
|
|
|
2023-04-01 19:19:35 +03:00
|
|
|
static inline VALUE
|
|
|
|
vm_call_cfunc_array_argv(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int stack_offset, int argc_offset)
|
|
|
|
{
|
|
|
|
VALUE argv_ary = reg_cfp->sp[-1 - stack_offset];
|
|
|
|
int argc = RARRAY_LENINT(argv_ary) - argc_offset;
|
|
|
|
|
|
|
|
if (UNLIKELY(argc > VM_ARGC_STACK_MAX)) {
|
|
|
|
return vm_call_cfunc_other(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
|
|
|
|
calling->kw_splat = 0;
|
|
|
|
int i;
|
|
|
|
VALUE *stack_bottom = reg_cfp->sp - 2 - stack_offset;
|
|
|
|
VALUE *sp = stack_bottom;
|
|
|
|
CHECK_VM_STACK_OVERFLOW(reg_cfp, argc);
|
|
|
|
for(i = 0; i < argc; i++) {
|
|
|
|
*++sp = argv[i];
|
|
|
|
}
|
|
|
|
reg_cfp->sp = sp+1;
|
|
|
|
|
|
|
|
return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, stack_bottom+1, stack_bottom);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
|
|
|
vm_call_cfunc_only_splat(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat);
|
|
|
|
VALUE argv_ary = reg_cfp->sp[-1];
|
|
|
|
int argc = RARRAY_LENINT(argv_ary);
|
|
|
|
VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
|
|
|
|
VALUE last_hash;
|
|
|
|
int argc_offset = 0;
|
|
|
|
|
|
|
|
if (UNLIKELY(argc > 0 &&
|
|
|
|
RB_TYPE_P((last_hash = argv[argc-1]), T_HASH) &&
|
|
|
|
(((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS))) {
|
|
|
|
if (!RHASH_EMPTY_P(last_hash)) {
|
|
|
|
return vm_call_cfunc_other(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
argc_offset++;
|
|
|
|
}
|
|
|
|
return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 0, argc_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
|
|
|
vm_call_cfunc_only_splat_kw(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat_kw);
|
|
|
|
VALUE keyword_hash = reg_cfp->sp[-1];
|
|
|
|
|
2024-01-14 22:41:02 +03:00
|
|
|
if (keyword_hash == Qnil || (RB_TYPE_P(keyword_hash, T_HASH) && RHASH_EMPTY_P(keyword_hash))) {
|
2023-04-01 19:19:35 +03:00
|
|
|
return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 1, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vm_call_cfunc_other(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2023-04-01 19:19:35 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_cfunc);
|
|
|
|
|
2024-07-02 20:31:15 +03:00
|
|
|
if (IS_ARGS_SPLAT(ci) && !(vm_ci_flag(ci) & VM_CALL_FORWARDING)) {
|
2023-04-01 19:19:35 +03:00
|
|
|
if (!IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 1) {
|
|
|
|
// f(*a)
|
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat, TRUE);
|
|
|
|
return vm_call_cfunc_only_splat(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
if (IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 2) {
|
|
|
|
// f(*a, **kw)
|
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat_kw, TRUE);
|
|
|
|
return vm_call_cfunc_only_splat_kw(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_cfunc_other, TRUE);
|
|
|
|
return vm_call_cfunc_other(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_ivar(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_ivar);
|
2012-10-14 23:58:59 +04:00
|
|
|
cfp->sp -= 1;
|
2023-02-28 15:44:57 +03:00
|
|
|
VALUE ivar = vm_getivar(calling->recv, vm_cc_cme(cc)->def->body.attr.id, NULL, NULL, cc, TRUE, Qnil);
|
2022-10-03 18:14:32 +03:00
|
|
|
return ivar;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2022-10-03 18:14:32 +03:00
|
|
|
vm_call_attrset_direct(rb_execution_context_t *ec, rb_control_frame_t *cfp, const struct rb_callcache *cc, VALUE obj)
|
2012-08-23 11:22:40 +04:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_attrset);
|
2016-04-23 15:17:36 +03:00
|
|
|
VALUE val = *(cfp->sp - 1);
|
2012-10-14 23:58:59 +04:00
|
|
|
cfp->sp -= 2;
|
2022-10-03 18:14:32 +03:00
|
|
|
attr_index_t index = vm_cc_attr_index(cc);
|
|
|
|
shape_id_t dest_shape_id = vm_cc_attr_index_dest_shape_id(cc);
|
|
|
|
ID id = vm_cc_cme(cc)->def->body.attr.id;
|
2024-05-30 15:55:32 +03:00
|
|
|
rb_check_frozen(obj);
|
2022-10-03 20:52:40 +03:00
|
|
|
VALUE res = vm_setivar(obj, id, val, dest_shape_id, index);
|
2022-11-15 07:24:08 +03:00
|
|
|
if (UNDEF_P(res)) {
|
2022-10-03 18:14:32 +03:00
|
|
|
switch (BUILTIN_TYPE(obj)) {
|
2022-10-12 12:27:23 +03:00
|
|
|
case T_OBJECT:
|
|
|
|
case T_CLASS:
|
|
|
|
case T_MODULE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
res = vm_setivar_default(obj, id, val, dest_shape_id, index);
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(res)) {
|
2022-10-12 12:27:23 +03:00
|
|
|
return res;
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
res = vm_setivar_slowpath_attr(obj, id, val, cc);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_attrset(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
return vm_call_attrset_direct(ec, cfp, calling->cc, calling->recv);
|
2012-08-23 11:22:40 +04:00
|
|
|
}
|
|
|
|
|
2012-10-18 10:14:39 +04:00
|
|
|
static inline VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_bmethod_body(rb_execution_context_t *ec, struct rb_calling_info *calling, const VALUE *argv)
|
2012-08-23 11:22:40 +04:00
|
|
|
{
|
2012-10-14 23:58:59 +04:00
|
|
|
rb_proc_t *proc;
|
|
|
|
VALUE val;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2020-09-25 12:31:04 +03:00
|
|
|
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
|
2020-10-29 18:32:53 +03:00
|
|
|
VALUE procv = cme->def->body.bmethod.proc;
|
2020-09-25 12:31:04 +03:00
|
|
|
|
2020-10-29 18:32:53 +03:00
|
|
|
if (!RB_OBJ_SHAREABLE_P(procv) &&
|
2020-12-19 19:44:41 +03:00
|
|
|
cme->def->body.bmethod.defined_ractor != rb_ractor_self(rb_ec_ractor_ptr(ec))) {
|
2021-08-24 15:00:00 +03:00
|
|
|
rb_raise(rb_eRuntimeError, "defined with an un-shareable Proc in a different Ractor");
|
2020-09-25 12:31:04 +03:00
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
|
|
|
/* control block frame */
|
2020-10-29 18:32:53 +03:00
|
|
|
GetProcPtr(procv, proc);
|
2024-08-08 01:29:33 +03:00
|
|
|
val = vm_invoke_bmethod(ec, proc, calling->recv, CALLING_ARGC(calling), argv, calling->kw_splat, calling->block_handler, vm_cc_cme(cc));
|
2012-10-14 23:58:59 +04:00
|
|
|
|
|
|
|
return val;
|
2012-08-23 11:22:40 +04:00
|
|
|
}
|
|
|
|
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
static int vm_callee_setup_block_arg(rb_execution_context_t *ec, struct rb_calling_info *calling, const struct rb_callinfo *ci, const rb_iseq_t *iseq, VALUE *argv, const enum arg_setup_type arg_setup_type);
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2012-08-23 11:22:40 +04:00
|
|
|
{
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_iseq_bmethod);
|
|
|
|
|
|
|
|
const struct rb_callcache *cc = calling->cc;
|
|
|
|
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
|
|
|
|
VALUE procv = cme->def->body.bmethod.proc;
|
|
|
|
|
|
|
|
if (!RB_OBJ_SHAREABLE_P(procv) &&
|
|
|
|
cme->def->body.bmethod.defined_ractor != rb_ractor_self(rb_ec_ractor_ptr(ec))) {
|
|
|
|
rb_raise(rb_eRuntimeError, "defined with an un-shareable Proc in a different Ractor");
|
|
|
|
}
|
|
|
|
|
|
|
|
rb_proc_t *proc;
|
|
|
|
GetProcPtr(procv, proc);
|
|
|
|
const struct rb_block *block = &proc->block;
|
|
|
|
|
|
|
|
while (vm_block_type(block) == block_type_proc) {
|
|
|
|
block = vm_proc_block(block->as.proc);
|
|
|
|
}
|
|
|
|
VM_ASSERT(vm_block_type(block) == block_type_iseq);
|
|
|
|
|
|
|
|
const struct rb_captured_block *captured = &block->as.captured;
|
|
|
|
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
|
2023-07-17 20:57:58 +03:00
|
|
|
VALUE * const argv = cfp->sp - calling->argc;
|
|
|
|
const int arg_size = ISEQ_BODY(iseq)->param.size;
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
|
2023-07-17 20:57:58 +03:00
|
|
|
int opt_pc;
|
2023-07-31 10:04:16 +03:00
|
|
|
if (vm_ci_flag(calling->cd->ci) & VM_CALL_ARGS_SIMPLE) {
|
|
|
|
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->cd->ci, iseq, argv, arg_setup_method);
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
}
|
|
|
|
else {
|
2023-07-31 10:04:16 +03:00
|
|
|
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->cd->ci, argv, arg_setup_method);
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
}
|
|
|
|
|
2023-07-17 20:57:58 +03:00
|
|
|
cfp->sp = argv - 1; // -1 for the receiver
|
|
|
|
|
|
|
|
vm_push_frame(ec, iseq,
|
|
|
|
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA,
|
|
|
|
calling->recv,
|
|
|
|
VM_GUARDED_PREV_EP(captured->ep),
|
|
|
|
(VALUE)cme,
|
|
|
|
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
|
|
|
|
argv + arg_size,
|
|
|
|
ISEQ_BODY(iseq)->local_table_size - arg_size,
|
|
|
|
ISEQ_BODY(iseq)->stack_max);
|
|
|
|
|
|
|
|
return Qundef;
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_noniseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_noniseq_bmethod);
|
2019-03-22 10:57:26 +03:00
|
|
|
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
VALUE *argv;
|
2015-09-19 20:59:58 +03:00
|
|
|
int argc;
|
2023-07-31 10:04:16 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, calling->cd->ci, ALLOW_HEAP_ARGV);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(calling->heap_argv)) {
|
|
|
|
argv = RARRAY_PTR(calling->heap_argv);
|
|
|
|
cfp->sp -= 2;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
argc = calling->argc;
|
|
|
|
argv = ALLOCA_N(VALUE, argc);
|
|
|
|
MEMCPY(argv, cfp->sp - argc, VALUE, argc);
|
|
|
|
cfp->sp += - argc - 1;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_bmethod_body(ec, calling, argv);
|
2012-08-23 11:22:40 +04:00
|
|
|
}
|
2012-09-28 08:05:36 +04:00
|
|
|
|
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
2023-03-24 00:39:31 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_bmethod);
|
|
|
|
|
|
|
|
const struct rb_callcache *cc = calling->cc;
|
|
|
|
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
|
|
|
|
VALUE procv = cme->def->body.bmethod.proc;
|
|
|
|
rb_proc_t *proc;
|
|
|
|
GetProcPtr(procv, proc);
|
|
|
|
const struct rb_block *block = &proc->block;
|
|
|
|
|
|
|
|
while (vm_block_type(block) == block_type_proc) {
|
|
|
|
block = vm_proc_block(block->as.proc);
|
|
|
|
}
|
|
|
|
if (vm_block_type(block) == block_type_iseq) {
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_bmethod, TRUE);
|
|
|
|
return vm_call_iseq_bmethod(ec, cfp, calling);
|
|
|
|
}
|
|
|
|
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_noniseq_bmethod, TRUE);
|
|
|
|
return vm_call_noniseq_bmethod(ec, cfp, calling);
|
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
VALUE
|
2020-08-27 18:37:03 +03:00
|
|
|
rb_find_defined_class_by_owner(VALUE current_class, VALUE target_owner)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
|
|
|
VALUE klass = current_class;
|
|
|
|
|
|
|
|
/* for prepended Module, then start from cover class */
|
2020-04-05 22:10:42 +03:00
|
|
|
if (RB_TYPE_P(klass, T_ICLASS) && FL_TEST(klass, RICLASS_IS_ORIGIN) &&
|
|
|
|
RB_TYPE_P(RBASIC_CLASS(klass), T_CLASS)) {
|
|
|
|
klass = RBASIC_CLASS(klass);
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
while (RTEST(klass)) {
|
|
|
|
VALUE owner = RB_TYPE_P(klass, T_ICLASS) ? RBASIC_CLASS(klass) : klass;
|
|
|
|
if (owner == target_owner) {
|
|
|
|
return klass;
|
|
|
|
}
|
|
|
|
klass = RCLASS_SUPER(klass);
|
|
|
|
}
|
|
|
|
|
|
|
|
return current_class; /* maybe module function */
|
|
|
|
}
|
|
|
|
|
|
|
|
static const rb_callable_method_entry_t *
|
|
|
|
aliased_callable_method_entry(const rb_callable_method_entry_t *me)
|
|
|
|
{
|
|
|
|
const rb_method_entry_t *orig_me = me->def->body.alias.original_me;
|
|
|
|
const rb_callable_method_entry_t *cme;
|
|
|
|
|
|
|
|
if (orig_me->defined_class == 0) {
|
2020-08-27 18:37:03 +03:00
|
|
|
VALUE defined_class = rb_find_defined_class_by_owner(me->defined_class, orig_me->owner);
|
2024-10-31 16:12:16 +03:00
|
|
|
VM_ASSERT_TYPE(orig_me->owner, T_MODULE);
|
2020-01-08 10:14:01 +03:00
|
|
|
cme = rb_method_entry_complement_defined_class(orig_me, me->called_id, defined_class);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2023-09-20 19:26:31 +03:00
|
|
|
if (me->def->reference_count == 1) {
|
2020-01-08 10:14:01 +03:00
|
|
|
RB_OBJ_WRITE(me, &me->def->body.alias.original_me, cme);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_method_definition_t *def =
|
|
|
|
rb_method_definition_create(VM_METHOD_TYPE_ALIAS, me->def->original_id);
|
|
|
|
rb_method_definition_set((rb_method_entry_t *)me, def, (void *)cme);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cme = (const rb_callable_method_entry_t *)orig_me;
|
|
|
|
}
|
|
|
|
|
|
|
|
VM_ASSERT(callable_method_entry_p(cme));
|
|
|
|
return cme;
|
|
|
|
}
|
|
|
|
|
2021-05-05 23:06:19 +03:00
|
|
|
const rb_callable_method_entry_t *
|
|
|
|
rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me)
|
|
|
|
{
|
|
|
|
return aliased_callable_method_entry(me);
|
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_alias(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
calling->cc = &VM_CC_ON_STACK(Qundef,
|
|
|
|
vm_call_general,
|
2022-10-03 18:14:32 +03:00
|
|
|
{{0}},
|
2020-12-14 23:40:38 +03:00
|
|
|
aliased_callable_method_entry(vm_cc_cme(calling->cc)));
|
2020-06-04 07:03:13 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
|
2015-06-03 13:42:18 +03:00
|
|
|
static enum method_missing_reason
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
ci_missing_reason(const struct rb_callinfo *ci)
|
2015-02-05 06:31:07 +03:00
|
|
|
{
|
2015-06-03 13:42:18 +03:00
|
|
|
enum method_missing_reason stat = MISSING_NOENTRY;
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (vm_ci_flag(ci) & VM_CALL_VCALL) stat |= MISSING_VCALL;
|
|
|
|
if (vm_ci_flag(ci) & VM_CALL_FCALL) stat |= MISSING_FCALL;
|
|
|
|
if (vm_ci_flag(ci) & VM_CALL_SUPER) stat |= MISSING_SUPER;
|
2015-02-05 06:31:07 +03:00
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
2022-08-10 23:02:19 +03:00
|
|
|
static VALUE vm_call_method_missing(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling);
|
|
|
|
|
2020-05-28 06:33:53 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_symbol(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
2022-08-10 23:02:19 +03:00
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE symbol, int flags)
|
2020-05-28 06:33:53 +03:00
|
|
|
{
|
|
|
|
ASSUME(calling->argc >= 0);
|
2020-05-28 06:41:48 +03:00
|
|
|
|
2020-05-28 06:33:53 +03:00
|
|
|
enum method_missing_reason missing_reason = MISSING_NOENTRY;
|
|
|
|
int argc = calling->argc;
|
|
|
|
VALUE recv = calling->recv;
|
|
|
|
VALUE klass = CLASS_OF(recv);
|
|
|
|
ID mid = rb_check_id(&symbol);
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
flags |= VM_CALL_OPT_SEND;
|
2020-05-28 06:41:48 +03:00
|
|
|
|
2020-05-28 06:33:53 +03:00
|
|
|
if (UNLIKELY(! mid)) {
|
|
|
|
mid = idMethodMissing;
|
|
|
|
missing_reason = ci_missing_reason(ci);
|
|
|
|
ec->method_missing_reason = missing_reason;
|
2020-05-28 06:41:48 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
VALUE argv_ary;
|
|
|
|
if (UNLIKELY(argv_ary = calling->heap_argv)) {
|
|
|
|
if (rb_method_basic_definition_p(klass, idMethodMissing)) {
|
|
|
|
rb_ary_unshift(argv_ary, symbol);
|
|
|
|
|
|
|
|
/* Inadvertent symbol creation shall be forbidden, see [Feature #5112] */
|
|
|
|
int priv = vm_ci_flag(ci) & (VM_CALL_FCALL | VM_CALL_VCALL);
|
|
|
|
VALUE exc = rb_make_no_method_exception(
|
|
|
|
rb_eNoMethodError, 0, recv, RARRAY_LENINT(argv_ary), RARRAY_CONST_PTR(argv_ary), priv);
|
|
|
|
|
|
|
|
rb_exc_raise(exc);
|
|
|
|
}
|
|
|
|
rb_ary_unshift(argv_ary, rb_str_intern(symbol));
|
2020-05-28 06:33:53 +03:00
|
|
|
}
|
|
|
|
else {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
/* E.g. when argc == 2
|
|
|
|
*
|
|
|
|
* | | | | TOPN
|
|
|
|
* | | +------+
|
|
|
|
* | | +---> | arg1 | 0
|
|
|
|
* +------+ | +------+
|
|
|
|
* | arg1 | -+ +-> | arg0 | 1
|
|
|
|
* +------+ | +------+
|
|
|
|
* | arg0 | ---+ | sym | 2
|
|
|
|
* +------+ +------+
|
|
|
|
* | recv | | recv | 3
|
|
|
|
* --+------+--------+------+------
|
|
|
|
*/
|
|
|
|
int i = argc;
|
|
|
|
CHECK_VM_STACK_OVERFLOW(reg_cfp, 1);
|
|
|
|
INC_SP(1);
|
|
|
|
MEMMOVE(&TOPN(i - 1), &TOPN(i), VALUE, i);
|
|
|
|
argc = ++calling->argc;
|
|
|
|
|
|
|
|
if (rb_method_basic_definition_p(klass, idMethodMissing)) {
|
|
|
|
/* Inadvertent symbol creation shall be forbidden, see [Feature #5112] */
|
|
|
|
TOPN(i) = symbol;
|
|
|
|
int priv = vm_ci_flag(ci) & (VM_CALL_FCALL | VM_CALL_VCALL);
|
|
|
|
const VALUE *argv = STACK_ADDR_FROM_TOP(argc);
|
|
|
|
VALUE exc = rb_make_no_method_exception(
|
|
|
|
rb_eNoMethodError, 0, recv, argc, argv, priv);
|
|
|
|
|
|
|
|
rb_exc_raise(exc);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
TOPN(i) = rb_str_intern(symbol);
|
|
|
|
}
|
2020-05-28 06:33:53 +03:00
|
|
|
}
|
|
|
|
}
|
2020-05-28 06:41:48 +03:00
|
|
|
|
2024-06-20 17:56:03 +03:00
|
|
|
struct rb_forwarding_call_data new_fcd = {
|
|
|
|
.cd = {
|
|
|
|
.ci = &VM_CI_ON_STACK(mid, flags, argc, vm_ci_kwarg(ci)),
|
|
|
|
.cc = NULL,
|
|
|
|
},
|
|
|
|
.caller_ci = NULL,
|
2023-07-31 10:04:16 +03:00
|
|
|
};
|
2024-06-20 17:56:03 +03:00
|
|
|
|
|
|
|
if (!(vm_ci_flag(ci) & VM_CALL_FORWARDING)) {
|
|
|
|
calling->cd = &new_fcd.cd;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const struct rb_callinfo *caller_ci = ((struct rb_forwarding_call_data *)calling->cd)->caller_ci;
|
|
|
|
VM_ASSERT((vm_ci_argc(caller_ci), 1));
|
|
|
|
new_fcd.caller_ci = caller_ci;
|
|
|
|
calling->cd = (struct rb_call_data *)&new_fcd;
|
|
|
|
}
|
2020-12-14 23:40:38 +03:00
|
|
|
calling->cc = &VM_CC_ON_STACK(klass,
|
|
|
|
vm_call_general,
|
|
|
|
{ .method_missing_reason = missing_reason },
|
|
|
|
rb_callable_method_entry_with_refinements(klass, mid, NULL));
|
2020-06-04 07:03:13 +03:00
|
|
|
|
2022-08-10 23:02:19 +03:00
|
|
|
if (flags & VM_CALL_FCALL) {
|
|
|
|
return vm_call_method(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct rb_callcache *cc = calling->cc;
|
|
|
|
VM_ASSERT(callable_method_entry_p(vm_cc_cme(cc)));
|
|
|
|
|
|
|
|
if (vm_cc_cme(cc) != NULL) {
|
|
|
|
switch (METHOD_ENTRY_VISI(vm_cc_cme(cc))) {
|
|
|
|
case METHOD_VISI_PUBLIC: /* likely */
|
|
|
|
return vm_call_method_each_type(ec, reg_cfp, calling);
|
|
|
|
case METHOD_VISI_PRIVATE:
|
|
|
|
vm_cc_method_missing_reason_set(cc, MISSING_PRIVATE);
|
2022-08-25 10:28:07 +03:00
|
|
|
break;
|
2022-08-10 23:02:19 +03:00
|
|
|
case METHOD_VISI_PROTECTED:
|
|
|
|
vm_cc_method_missing_reason_set(cc, MISSING_PROTECTED);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
VM_UNREACHABLE(vm_call_method);
|
|
|
|
}
|
|
|
|
return vm_call_method_missing(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vm_call_method_nome(ec, reg_cfp, calling);
|
2020-05-28 06:33:53 +03:00
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
vm_call_opt_send0(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int flags)
|
2012-09-28 08:05:36 +04:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
int i;
|
|
|
|
VALUE sym;
|
|
|
|
|
|
|
|
i = calling->argc - 1;
|
|
|
|
|
|
|
|
if (calling->argc == 0) {
|
|
|
|
rb_raise(rb_eArgError, "no method name given");
|
|
|
|
}
|
|
|
|
|
|
|
|
sym = TOPN(i);
|
|
|
|
/* E.g. when i == 2
|
|
|
|
*
|
|
|
|
* | | | | TOPN
|
|
|
|
* +------+ | |
|
|
|
|
* | arg1 | ---+ | | 0
|
|
|
|
* +------+ | +------+
|
|
|
|
* | arg0 | -+ +-> | arg1 | 1
|
|
|
|
* +------+ | +------+
|
|
|
|
* | sym | +---> | arg0 | 2
|
|
|
|
* +------+ +------+
|
|
|
|
* | recv | | recv | 3
|
|
|
|
* --+------+--------+------+------
|
|
|
|
*/
|
|
|
|
/* shift arguments */
|
|
|
|
if (i > 0) {
|
|
|
|
MEMMOVE(&TOPN(i), &TOPN(i-1), VALUE, i);
|
|
|
|
}
|
|
|
|
calling->argc -= 1;
|
|
|
|
DEC_SP(1);
|
|
|
|
|
|
|
|
return vm_call_symbol(ec, reg_cfp, calling, ci, sym, flags);
|
|
|
|
}
|
2019-03-22 10:57:26 +03:00
|
|
|
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_opt_send_complex(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_send_complex);
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
int flags = VM_CALL_FCALL;
|
|
|
|
VALUE sym;
|
2012-09-28 08:05:36 +04:00
|
|
|
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
VALUE argv_ary;
|
|
|
|
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(argv_ary = calling->heap_argv)) {
|
|
|
|
sym = rb_ary_shift(argv_ary);
|
|
|
|
flags |= VM_CALL_ARGS_SPLAT;
|
|
|
|
if (calling->kw_splat) {
|
|
|
|
VALUE last_hash = rb_ary_last(0, NULL, argv_ary);
|
|
|
|
((struct RHash *)last_hash)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
|
|
|
|
calling->kw_splat = 0;
|
|
|
|
}
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
return vm_call_symbol(ec, reg_cfp, calling, ci, sym, flags);
|
2012-09-28 08:05:36 +04:00
|
|
|
}
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
if (calling->kw_splat) flags |= VM_CALL_KW_SPLAT;
|
|
|
|
return vm_call_opt_send0(ec, reg_cfp, calling, flags);
|
|
|
|
}
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_opt_send_simple(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_send_simple);
|
2023-07-31 10:04:16 +03:00
|
|
|
return vm_call_opt_send0(ec, reg_cfp, calling, vm_ci_flag(calling->cd->ci) | VM_CALL_FCALL);
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_opt_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_send);
|
|
|
|
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
int flags = vm_ci_flag(ci);
|
|
|
|
|
2024-07-02 20:54:23 +03:00
|
|
|
if (UNLIKELY((flags & VM_CALL_FORWARDING) || (!(flags & VM_CALL_ARGS_SIMPLE) &&
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
((calling->argc == 1 && (flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT))) ||
|
|
|
|
(calling->argc == 2 && (flags & VM_CALL_ARGS_SPLAT) && (flags & VM_CALL_KW_SPLAT)) ||
|
2024-07-02 20:54:23 +03:00
|
|
|
((flags & VM_CALL_KWARG) && (vm_ci_kwarg(ci)->keyword_len == calling->argc)))))) {
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_opt_send_complex, TRUE);
|
|
|
|
return vm_call_opt_send_complex(ec, reg_cfp, calling);
|
2020-05-28 06:33:53 +03:00
|
|
|
}
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
|
Optimize send calls
Similar to the bmethod optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the method argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* send(meth, arg) ~5%
* send(meth, *args) ~75% for args.length == 200
* send(meth, *args, **kw) ~50% for args.length == 200
* send(meth, **kw) ~25%
* send(meth, kw: 1) ~115%
Note that empty argument splats do get slower with this approach,
by about 20%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* send(*args)
* send(*args, **kw)
This is because the you cannot shift the method argument off
without first splatting the arg.
2023-04-02 06:55:43 +03:00
|
|
|
CC_SET_FASTPATH(calling->cc, vm_call_opt_send_simple, TRUE);
|
|
|
|
return vm_call_opt_send_simple(ec, reg_cfp, calling);
|
2012-09-28 08:05:36 +04:00
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static VALUE
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_call_method_missing_body(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling,
|
|
|
|
const struct rb_callinfo *orig_ci, enum method_missing_reason reason)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_method_missing);
|
|
|
|
|
2015-09-19 20:59:58 +03:00
|
|
|
VALUE *argv = STACK_ADDR_FROM_TOP(calling->argc);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
unsigned int argc, flag;
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2023-04-02 21:28:01 +03:00
|
|
|
flag = VM_CALL_FCALL | VM_CALL_OPT_SEND | vm_ci_flag(orig_ci);
|
|
|
|
argc = ++calling->argc;
|
2015-09-19 20:59:58 +03:00
|
|
|
|
2023-04-02 21:28:01 +03:00
|
|
|
/* shift arguments: m(a, b, c) #=> method_missing(:m, a, b, c) */
|
|
|
|
CHECK_VM_STACK_OVERFLOW(reg_cfp, 1);
|
|
|
|
vm_check_canary(ec, reg_cfp->sp);
|
|
|
|
if (argc > 1) {
|
|
|
|
MEMMOVE(argv+1, argv, VALUE, argc-1);
|
2012-10-17 11:12:40 +04:00
|
|
|
}
|
2023-04-02 21:28:01 +03:00
|
|
|
argv[0] = ID2SYM(vm_ci_mid(orig_ci));
|
|
|
|
INC_SP(1);
|
2012-10-17 11:12:40 +04:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
ec->method_missing_reason = reason;
|
2024-06-20 17:56:03 +03:00
|
|
|
|
|
|
|
struct rb_forwarding_call_data new_fcd = {
|
|
|
|
.cd = {
|
|
|
|
.ci = &VM_CI_ON_STACK(idMethodMissing, flag, argc, vm_ci_kwarg(orig_ci)),
|
|
|
|
.cc = NULL,
|
|
|
|
},
|
|
|
|
.caller_ci = NULL,
|
2023-07-31 10:04:16 +03:00
|
|
|
};
|
2024-06-20 17:56:03 +03:00
|
|
|
|
|
|
|
if (!(flag & VM_CALL_FORWARDING)) {
|
|
|
|
calling->cd = &new_fcd.cd;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const struct rb_callinfo *caller_ci = ((struct rb_forwarding_call_data *)calling->cd)->caller_ci;
|
|
|
|
VM_ASSERT((vm_ci_argc(caller_ci), 1));
|
|
|
|
new_fcd.caller_ci = caller_ci;
|
|
|
|
calling->cd = (struct rb_call_data *)&new_fcd;
|
|
|
|
}
|
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
calling->cc = &VM_CC_ON_STACK(Qundef, vm_call_general, {{ 0 }},
|
2020-12-14 23:40:38 +03:00
|
|
|
rb_callable_method_entry_without_refinements(CLASS_OF(calling->recv), idMethodMissing, NULL));
|
|
|
|
return vm_call_method(ec, reg_cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_method_missing(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
return vm_call_method_missing_body(ec, reg_cfp, calling, calling->cd->ci, vm_cc_cmethod_missing_reason(calling->cc));
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
|
2016-12-08 08:16:33 +03:00
|
|
|
static const rb_callable_method_entry_t *refined_method_callable_without_refinement(const rb_callable_method_entry_t *me);
|
2015-10-05 22:44:05 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_zsuper(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling, VALUE klass)
|
2015-10-05 22:44:05 +03:00
|
|
|
{
|
|
|
|
klass = RCLASS_SUPER(klass);
|
|
|
|
|
2023-07-31 10:04:16 +03:00
|
|
|
const rb_callable_method_entry_t *cme = klass ? rb_callable_method_entry(klass, vm_ci_mid(calling->cd->ci)) : NULL;
|
2020-01-08 10:14:01 +03:00
|
|
|
if (cme == NULL) {
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_nome(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
if (cme->def->type == VM_METHOD_TYPE_REFINED &&
|
|
|
|
cme->def->body.refined.orig_me) {
|
|
|
|
cme = refined_method_callable_without_refinement(cme);
|
2016-12-08 08:16:33 +03:00
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
|
2022-10-03 18:14:32 +03:00
|
|
|
calling->cc = &VM_CC_ON_STACK(Qundef, vm_call_general, {{ 0 }}, cme);
|
2020-06-04 07:03:13 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
2012-12-07 19:49:21 +04:00
|
|
|
static inline VALUE
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
find_refinement(VALUE refinements, VALUE klass)
|
|
|
|
{
|
|
|
|
if (NIL_P(refinements)) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
2012-12-07 19:49:21 +04:00
|
|
|
return rb_hash_lookup(refinements, klass);
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
}
|
|
|
|
|
2017-10-27 05:49:30 +03:00
|
|
|
PUREFUNC(static rb_control_frame_t * current_method_entry(const rb_execution_context_t *ec, rb_control_frame_t *cfp));
|
2013-02-24 08:36:00 +04:00
|
|
|
static rb_control_frame_t *
|
2017-10-27 05:49:30 +03:00
|
|
|
current_method_entry(const rb_execution_context_t *ec, rb_control_frame_t *cfp)
|
2013-02-24 08:36:00 +04:00
|
|
|
{
|
|
|
|
rb_control_frame_t *top_cfp = cfp;
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
if (cfp->iseq && ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_BLOCK) {
|
|
|
|
const rb_iseq_t *local_iseq = ISEQ_BODY(cfp->iseq)->local_iseq;
|
2015-07-22 01:52:59 +03:00
|
|
|
|
2013-02-24 08:36:00 +04:00
|
|
|
do {
|
|
|
|
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
2017-10-27 05:49:30 +03:00
|
|
|
if (RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(ec, cfp)) {
|
2013-02-24 08:36:00 +04:00
|
|
|
/* TODO: orphan block */
|
|
|
|
return top_cfp;
|
|
|
|
}
|
|
|
|
} while (cfp->iseq != local_iseq);
|
|
|
|
}
|
|
|
|
return cfp;
|
|
|
|
}
|
|
|
|
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
static const rb_callable_method_entry_t *
|
|
|
|
refined_method_callable_without_refinement(const rb_callable_method_entry_t *me)
|
|
|
|
{
|
|
|
|
const rb_method_entry_t *orig_me = me->def->body.refined.orig_me;
|
|
|
|
const rb_callable_method_entry_t *cme;
|
|
|
|
|
|
|
|
if (orig_me->defined_class == 0) {
|
|
|
|
cme = NULL;
|
|
|
|
rb_notimplement();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cme = (const rb_callable_method_entry_t *)orig_me;
|
|
|
|
}
|
|
|
|
|
|
|
|
VM_ASSERT(callable_method_entry_p(cme));
|
2016-12-08 08:16:33 +03:00
|
|
|
|
|
|
|
if (UNDEFINED_METHOD_ENTRY_P(cme)) {
|
|
|
|
cme = NULL;
|
|
|
|
}
|
|
|
|
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
return cme;
|
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
static const rb_callable_method_entry_t *
|
2020-12-14 23:40:38 +03:00
|
|
|
search_refined_method(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2019-08-17 06:32:29 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
ID mid = vm_ci_mid(calling->cd->ci);
|
2019-08-17 06:32:29 +03:00
|
|
|
const rb_cref_t *cref = vm_get_cref(cfp->ep);
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache * const cc = calling->cc;
|
2020-01-08 10:14:01 +03:00
|
|
|
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
|
2019-08-17 06:32:29 +03:00
|
|
|
|
|
|
|
for (; cref; cref = CREF_NEXT(cref)) {
|
2020-01-08 10:14:01 +03:00
|
|
|
const VALUE refinement = find_refinement(CREF_REFINEMENTS(cref), vm_cc_cme(cc)->owner);
|
2019-08-17 06:32:29 +03:00
|
|
|
if (NIL_P(refinement)) continue;
|
|
|
|
|
|
|
|
const rb_callable_method_entry_t *const ref_me =
|
|
|
|
rb_callable_method_entry(refinement, mid);
|
|
|
|
|
|
|
|
if (ref_me) {
|
2020-01-08 10:14:01 +03:00
|
|
|
if (vm_cc_call(cc) == vm_call_super_method) {
|
2019-08-17 06:32:29 +03:00
|
|
|
const rb_control_frame_t *top_cfp = current_method_entry(ec, cfp);
|
|
|
|
const rb_callable_method_entry_t *top_me = rb_vm_frame_method_entry(top_cfp);
|
|
|
|
if (top_me && rb_method_definition_eq(ref_me->def, top_me->def)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
if (cme->def->type != VM_METHOD_TYPE_REFINED ||
|
|
|
|
cme->def != ref_me->def) {
|
|
|
|
cme = ref_me;
|
2019-08-17 06:32:29 +03:00
|
|
|
}
|
|
|
|
if (ref_me->def->type != VM_METHOD_TYPE_REFINED) {
|
2020-01-08 10:14:01 +03:00
|
|
|
return cme;
|
2019-08-17 06:32:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2020-01-08 10:14:01 +03:00
|
|
|
return NULL;
|
2019-08-17 06:32:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
if (vm_cc_cme(cc)->def->body.refined.orig_me) {
|
|
|
|
return refined_method_callable_without_refinement(vm_cc_cme(cc));
|
2019-08-17 06:32:29 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-01-08 10:14:01 +03:00
|
|
|
VALUE klass = RCLASS_SUPER(vm_cc_cme(cc)->defined_class);
|
|
|
|
const rb_callable_method_entry_t *cme = klass ? rb_callable_method_entry(klass, mid) : NULL;
|
|
|
|
return cme;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_refined(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2020-01-08 10:14:01 +03:00
|
|
|
{
|
2023-07-31 10:17:55 +03:00
|
|
|
const rb_callable_method_entry_t *ref_cme = search_refined_method(ec, cfp, calling);
|
2020-01-08 10:14:01 +03:00
|
|
|
|
2023-07-31 10:17:55 +03:00
|
|
|
if (ref_cme) {
|
|
|
|
if (calling->cd->cc) {
|
|
|
|
const struct rb_callcache *cc = calling->cc = vm_cc_new(vm_cc_cme(calling->cc)->defined_class, ref_cme, vm_call_general, cc_type_refinement);
|
|
|
|
RB_OBJ_WRITE(cfp->iseq, &calling->cd->cc, cc);
|
|
|
|
return vm_call_method(ec, cfp, calling);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
struct rb_callcache *ref_cc = &VM_CC_ON_STACK(Qundef, vm_call_general, {{ 0 }}, ref_cme);
|
|
|
|
calling->cc= ref_cc;
|
|
|
|
return vm_call_method(ec, cfp, calling);
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_nome(ec, cfp, calling);
|
2019-08-17 06:32:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-18 05:01:31 +03:00
|
|
|
static inline VALUE vm_invoke_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_callinfo *ci, bool is_lambda, VALUE block_handler);
|
|
|
|
|
|
|
|
NOINLINE(static VALUE
|
|
|
|
vm_invoke_block_opt_call(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE block_handler));
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_invoke_block_opt_call(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci, VALUE block_handler)
|
|
|
|
{
|
|
|
|
int argc = calling->argc;
|
|
|
|
|
|
|
|
/* remove self */
|
|
|
|
if (argc > 0) MEMMOVE(&TOPN(argc), &TOPN(argc-1), VALUE, argc);
|
|
|
|
DEC_SP(1);
|
|
|
|
|
|
|
|
return vm_invoke_block(ec, reg_cfp, calling, ci, false, block_handler);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_opt_call(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_call);
|
|
|
|
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2021-11-18 05:01:31 +03:00
|
|
|
VALUE procval = calling->recv;
|
|
|
|
return vm_invoke_block_opt_call(ec, reg_cfp, calling, ci, VM_BH_FROM_PROC(procval));
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_opt_block_call(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_block_call);
|
|
|
|
|
|
|
|
VALUE block_handler = VM_ENV_BLOCK_HANDLER(VM_CF_LEP(reg_cfp));
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2021-11-18 05:01:31 +03:00
|
|
|
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_CALL, PROC_REDEFINED_OP_FLAG)) {
|
|
|
|
return vm_invoke_block_opt_call(ec, reg_cfp, calling, ci, block_handler);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
calling->recv = rb_vm_bh_to_procval(ec, block_handler);
|
|
|
|
calling->cc = rb_vm_search_method_slowpath(ci, CLASS_OF(calling->recv));
|
|
|
|
return vm_call_general(ec, reg_cfp, calling);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2021-11-20 18:31:51 +03:00
|
|
|
vm_call_opt_struct_aref0(rb_execution_context_t *ec, struct rb_calling_info *calling)
|
2021-11-18 05:01:31 +03:00
|
|
|
{
|
|
|
|
VALUE recv = calling->recv;
|
|
|
|
|
|
|
|
VM_ASSERT(RB_TYPE_P(recv, T_STRUCT));
|
|
|
|
VM_ASSERT(vm_cc_cme(calling->cc)->def->type == VM_METHOD_TYPE_OPTIMIZED);
|
|
|
|
VM_ASSERT(vm_cc_cme(calling->cc)->def->body.optimized.type == OPTIMIZED_METHOD_TYPE_STRUCT_AREF);
|
|
|
|
|
|
|
|
const unsigned int off = vm_cc_cme(calling->cc)->def->body.optimized.index;
|
|
|
|
return internal_RSTRUCT_GET(recv, off);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_opt_struct_aref(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_struct_aref);
|
|
|
|
|
2021-11-20 18:31:51 +03:00
|
|
|
VALUE ret = vm_call_opt_struct_aref0(ec, calling);
|
2021-11-18 05:01:31 +03:00
|
|
|
reg_cfp->sp -= 1;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2021-11-20 18:31:51 +03:00
|
|
|
vm_call_opt_struct_aset0(rb_execution_context_t *ec, struct rb_calling_info *calling, VALUE val)
|
2021-11-18 05:01:31 +03:00
|
|
|
{
|
|
|
|
VALUE recv = calling->recv;
|
|
|
|
|
|
|
|
VM_ASSERT(RB_TYPE_P(recv, T_STRUCT));
|
|
|
|
VM_ASSERT(vm_cc_cme(calling->cc)->def->type == VM_METHOD_TYPE_OPTIMIZED);
|
|
|
|
VM_ASSERT(vm_cc_cme(calling->cc)->def->body.optimized.type == OPTIMIZED_METHOD_TYPE_STRUCT_ASET);
|
|
|
|
|
|
|
|
rb_check_frozen(recv);
|
|
|
|
|
|
|
|
const unsigned int off = vm_cc_cme(calling->cc)->def->body.optimized.index;
|
|
|
|
internal_RSTRUCT_SET(recv, off, val);
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_call_opt_struct_aset(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
|
|
|
{
|
|
|
|
RB_DEBUG_COUNTER_INC(ccf_opt_struct_aset);
|
|
|
|
|
2021-11-20 18:31:51 +03:00
|
|
|
VALUE ret = vm_call_opt_struct_aset0(ec, calling, *(reg_cfp->sp - 1));
|
2021-11-18 05:01:31 +03:00
|
|
|
reg_cfp->sp -= 2;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
NOINLINE(static VALUE vm_call_optimized(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling,
|
|
|
|
const struct rb_callinfo *ci, const struct rb_callcache *cc));
|
|
|
|
|
2023-10-27 03:03:17 +03:00
|
|
|
#define VM_CALL_METHOD_ATTR(var, func, nohook) \
|
|
|
|
if (UNLIKELY(ruby_vm_event_flags & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN))) { \
|
|
|
|
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_CALL, calling->recv, vm_cc_cme(cc)->def->original_id, \
|
|
|
|
vm_ci_mid(ci), vm_cc_cme(cc)->owner, Qundef); \
|
|
|
|
var = func; \
|
|
|
|
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, calling->recv, vm_cc_cme(cc)->def->original_id, \
|
|
|
|
vm_ci_mid(ci), vm_cc_cme(cc)->owner, (var)); \
|
|
|
|
} \
|
|
|
|
else { \
|
|
|
|
nohook; \
|
|
|
|
var = func; \
|
|
|
|
}
|
|
|
|
|
2021-11-18 05:01:31 +03:00
|
|
|
static VALUE
|
|
|
|
vm_call_optimized(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling,
|
|
|
|
const struct rb_callinfo *ci, const struct rb_callcache *cc)
|
|
|
|
{
|
|
|
|
switch (vm_cc_cme(cc)->def->body.optimized.type) {
|
|
|
|
case OPTIMIZED_METHOD_TYPE_SEND:
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_opt_send, TRUE);
|
|
|
|
return vm_call_opt_send(ec, cfp, calling);
|
|
|
|
case OPTIMIZED_METHOD_TYPE_CALL:
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_opt_call, TRUE);
|
|
|
|
return vm_call_opt_call(ec, cfp, calling);
|
|
|
|
case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_opt_block_call, TRUE);
|
|
|
|
return vm_call_opt_block_call(ec, cfp, calling);
|
2023-10-27 03:03:17 +03:00
|
|
|
case OPTIMIZED_METHOD_TYPE_STRUCT_AREF: {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, 0);
|
2021-11-18 05:01:31 +03:00
|
|
|
rb_check_arity(calling->argc, 0, 0);
|
|
|
|
|
2023-10-27 03:03:17 +03:00
|
|
|
VALUE v;
|
|
|
|
VM_CALL_METHOD_ATTR(v,
|
|
|
|
vm_call_opt_struct_aref(ec, cfp, calling),
|
|
|
|
set_vm_cc_ivar(cc); \
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_opt_struct_aref, (vm_ci_flag(ci) & VM_CALL_ARGS_SIMPLE)))
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
case OPTIMIZED_METHOD_TYPE_STRUCT_ASET: {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, 1);
|
2021-11-18 05:01:31 +03:00
|
|
|
rb_check_arity(calling->argc, 1, 1);
|
2023-10-27 03:03:17 +03:00
|
|
|
|
|
|
|
VALUE v;
|
|
|
|
VM_CALL_METHOD_ATTR(v,
|
|
|
|
vm_call_opt_struct_aset(ec, cfp, calling),
|
|
|
|
set_vm_cc_ivar(cc); \
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_opt_struct_aset, (vm_ci_flag(ci) & VM_CALL_ARGS_SIMPLE)))
|
|
|
|
return v;
|
|
|
|
}
|
2021-11-18 05:01:31 +03:00
|
|
|
default:
|
|
|
|
rb_bug("vm_call_method: unsupported optimized method type (%d)", vm_cc_cme(cc)->def->body.optimized.type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
`Primitive.mandatory_only?` for fast path
Compare with the C methods, A built-in methods written in Ruby is
slower if only mandatory parameters are given because it needs to
check the argumens and fill default values for optional and keyword
parameters (C methods can check the number of parameters with `argc`,
so there are no overhead). Passing mandatory arguments are common
(optional arguments are exceptional, in many cases) so it is important
to provide the fast path for such common cases.
`Primitive.mandatory_only?` is a special builtin function used with
`if` expression like that:
```ruby
def self.at(time, subsec = false, unit = :microsecond, in: nil)
if Primitive.mandatory_only?
Primitive.time_s_at1(time)
else
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
end
```
and it makes two ISeq,
```
def self.at(time, subsec = false, unit = :microsecond, in: nil)
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
def self.at(time)
Primitive.time_s_at1(time)
end
```
and (2) is pointed by (1). Note that `Primitive.mandatory_only?`
should be used only in a condition of an `if` statement and the
`if` statement should be equal to the methdo body (you can not
put any expression before and after the `if` statement).
A method entry with `mandatory_only?` (`Time.at` on the above case)
is marked as `iseq_overload`. When the method will be dispatch only
with mandatory arguments (`Time.at(0)` for example), make another
method entry with ISeq (2) as mandatory only method entry and it
will be cached in an inline method cache.
The idea is similar discussed in https://bugs.ruby-lang.org/issues/16254
but it only checks mandatory parameters or more, because many cases
only mandatory parameters are given. If we find other cases (optional
or keyword parameters are used frequently and it hurts performance),
we can extend the feature.
2021-11-12 20:12:20 +03:00
|
|
|
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
|
2021-09-18 10:15:24 +03:00
|
|
|
VALUE v;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
2023-11-28 03:19:41 +03:00
|
|
|
VM_ASSERT(! METHOD_ENTRY_INVALIDATED(cme));
|
|
|
|
|
`Primitive.mandatory_only?` for fast path
Compare with the C methods, A built-in methods written in Ruby is
slower if only mandatory parameters are given because it needs to
check the argumens and fill default values for optional and keyword
parameters (C methods can check the number of parameters with `argc`,
so there are no overhead). Passing mandatory arguments are common
(optional arguments are exceptional, in many cases) so it is important
to provide the fast path for such common cases.
`Primitive.mandatory_only?` is a special builtin function used with
`if` expression like that:
```ruby
def self.at(time, subsec = false, unit = :microsecond, in: nil)
if Primitive.mandatory_only?
Primitive.time_s_at1(time)
else
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
end
```
and it makes two ISeq,
```
def self.at(time, subsec = false, unit = :microsecond, in: nil)
Primitive.time_s_at(time, subsec, unit, Primitive.arg!(:in))
end
def self.at(time)
Primitive.time_s_at1(time)
end
```
and (2) is pointed by (1). Note that `Primitive.mandatory_only?`
should be used only in a condition of an `if` statement and the
`if` statement should be equal to the methdo body (you can not
put any expression before and after the `if` statement).
A method entry with `mandatory_only?` (`Time.at` on the above case)
is marked as `iseq_overload`. When the method will be dispatch only
with mandatory arguments (`Time.at(0)` for example), make another
method entry with ISeq (2) as mandatory only method entry and it
will be cached in an inline method cache.
The idea is similar discussed in https://bugs.ruby-lang.org/issues/16254
but it only checks mandatory parameters or more, because many cases
only mandatory parameters are given. If we find other cases (optional
or keyword parameters are used frequently and it hurts performance),
we can extend the feature.
2021-11-12 20:12:20 +03:00
|
|
|
switch (cme->def->type) {
|
2015-10-05 22:44:05 +03:00
|
|
|
case VM_METHOD_TYPE_ISEQ:
|
2024-05-25 00:33:03 +03:00
|
|
|
if (ISEQ_BODY(def_iseq_ptr(cme->def))->param.flags.forwardable) {
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_fwd_setup, TRUE);
|
|
|
|
return vm_call_iseq_fwd_setup(ec, cfp, calling);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_iseq_setup, TRUE);
|
|
|
|
return vm_call_iseq_setup(ec, cfp, calling);
|
|
|
|
}
|
2015-09-19 20:59:58 +03:00
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
case VM_METHOD_TYPE_NOTIMPLEMENTED:
|
|
|
|
case VM_METHOD_TYPE_CFUNC:
|
2018-09-18 15:48:31 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_cfunc, TRUE);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_cfunc(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_ATTRSET:
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, 1);
|
2019-09-05 22:25:14 +03:00
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
rb_check_arity(calling->argc, 1, 1);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
const unsigned int aset_mask = (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT | VM_CALL_KWARG | VM_CALL_FORWARDING);
|
2022-10-03 18:14:32 +03:00
|
|
|
|
|
|
|
if (vm_cc_markable(cc)) {
|
|
|
|
vm_cc_attr_index_initialize(cc, INVALID_SHAPE_ID);
|
|
|
|
VM_CALL_METHOD_ATTR(v,
|
|
|
|
vm_call_attrset_direct(ec, cfp, cc, calling->recv),
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_attrset, !(vm_ci_flag(ci) & aset_mask)));
|
2022-10-12 12:27:23 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-03 18:14:32 +03:00
|
|
|
cc = &((struct rb_callcache) {
|
|
|
|
.flags = T_IMEMO |
|
|
|
|
(imemo_callcache << FL_USHIFT) |
|
|
|
|
VM_CALLCACHE_UNMARKABLE |
|
|
|
|
VM_CALLCACHE_ON_STACK,
|
|
|
|
.klass = cc->klass,
|
|
|
|
.cme_ = cc->cme_,
|
|
|
|
.call_ = cc->call_,
|
|
|
|
.aux_ = {
|
|
|
|
.attr = {
|
2022-10-03 20:52:40 +03:00
|
|
|
.value = INVALID_SHAPE_ID << SHAPE_FLAG_SHIFT,
|
2022-10-03 18:14:32 +03:00
|
|
|
}
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
VM_CALL_METHOD_ATTR(v,
|
|
|
|
vm_call_attrset_direct(ec, cfp, cc, calling->recv),
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_attrset, !(vm_ci_flag(ci) & aset_mask)));
|
|
|
|
}
|
2021-09-18 10:15:24 +03:00
|
|
|
return v;
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_IVAR:
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, 0);
|
2015-10-05 22:44:05 +03:00
|
|
|
rb_check_arity(calling->argc, 0, 0);
|
2022-10-12 13:38:29 +03:00
|
|
|
vm_cc_attr_index_initialize(cc, INVALID_SHAPE_ID);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
const unsigned int ivar_mask = (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT | VM_CALL_FORWARDING);
|
2021-09-18 10:15:24 +03:00
|
|
|
VM_CALL_METHOD_ATTR(v,
|
|
|
|
vm_call_ivar(ec, cfp, calling),
|
|
|
|
CC_SET_FASTPATH(cc, vm_call_ivar, !(vm_ci_flag(ci) & ivar_mask)));
|
|
|
|
return v;
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_MISSING:
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_cc_method_missing_reason_set(cc, 0);
|
2018-09-18 15:48:31 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_method_missing, TRUE);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_missing(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_BMETHOD:
|
2018-09-18 15:48:31 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_bmethod, TRUE);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_bmethod(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_ALIAS:
|
2020-01-08 10:14:01 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_alias, TRUE);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_alias(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
|
|
|
|
case VM_METHOD_TYPE_OPTIMIZED:
|
2021-11-18 05:01:31 +03:00
|
|
|
return vm_call_optimized(ec, cfp, calling, ci, cc);
|
2013-08-29 12:03:23 +04:00
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
case VM_METHOD_TYPE_UNDEF:
|
|
|
|
break;
|
2015-09-19 20:59:58 +03:00
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
case VM_METHOD_TYPE_ZSUPER:
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_zsuper(ec, cfp, calling, RCLASS_ORIGIN(vm_cc_cme(cc)->defined_class));
|
2015-10-05 22:44:05 +03:00
|
|
|
|
2019-08-17 06:32:29 +03:00
|
|
|
case VM_METHOD_TYPE_REFINED:
|
2020-01-08 10:14:01 +03:00
|
|
|
// CC_SET_FASTPATH(cc, vm_call_refined, TRUE);
|
2020-04-12 06:45:22 +03:00
|
|
|
// should not set FASTPATH since vm_call_refined assumes cc->call is vm_call_super_method on invokesuper.
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_refined(ec, cfp, calling);
|
2015-10-05 22:44:05 +03:00
|
|
|
}
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
rb_bug("vm_call_method: unsupported method type (%d)", vm_cc_cme(cc)->def->type);
|
2015-10-05 22:44:05 +03:00
|
|
|
}
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
|
2017-11-16 10:38:41 +03:00
|
|
|
NORETURN(static void vm_raise_method_missing(rb_execution_context_t *ec, int argc, const VALUE *argv, VALUE obj, int call_status));
|
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_method_nome(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2015-10-10 12:36:10 +03:00
|
|
|
{
|
|
|
|
/* method missing */
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2015-10-10 12:36:10 +03:00
|
|
|
const int stat = ci_missing_reason(ci);
|
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (vm_ci_mid(ci) == idMethodMissing) {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (UNLIKELY(calling->heap_argv)) {
|
|
|
|
vm_raise_method_missing(ec, RARRAY_LENINT(calling->heap_argv), RARRAY_CONST_PTR(calling->heap_argv), calling->recv, stat);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_control_frame_t *reg_cfp = cfp;
|
|
|
|
VALUE *argv = STACK_ADDR_FROM_TOP(calling->argc);
|
|
|
|
vm_raise_method_missing(ec, calling->argc, argv, calling->recv, stat);
|
|
|
|
}
|
2015-10-10 12:36:10 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_missing_body(ec, cfp, calling, ci, stat);
|
2015-10-10 12:36:10 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-27 23:47:06 +03:00
|
|
|
/* Protected method calls and super invocations need to check that the receiver
|
|
|
|
* (self for super) inherits the module on which the method is defined.
|
|
|
|
* In the case of refinements, it should consider the original class not the
|
|
|
|
* refinement.
|
|
|
|
*/
|
|
|
|
static VALUE
|
|
|
|
vm_defined_class_for_protected_call(const rb_callable_method_entry_t *me)
|
|
|
|
{
|
|
|
|
VALUE defined_class = me->defined_class;
|
|
|
|
VALUE refined_class = RCLASS_REFINED_CLASS(defined_class);
|
|
|
|
return NIL_P(refined_class) ? defined_class : refined_class;
|
|
|
|
}
|
|
|
|
|
2015-10-10 12:36:10 +03:00
|
|
|
static inline VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_method(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
|
2015-10-05 22:44:05 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc = calling->cc;
|
2019-10-24 12:08:52 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
VM_ASSERT(callable_method_entry_p(vm_cc_cme(cc)));
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
if (vm_cc_cme(cc) != NULL) {
|
|
|
|
switch (METHOD_ENTRY_VISI(vm_cc_cme(cc))) {
|
2015-10-06 13:25:25 +03:00
|
|
|
case METHOD_VISI_PUBLIC: /* likely */
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
2015-10-06 13:25:25 +03:00
|
|
|
|
|
|
|
case METHOD_VISI_PRIVATE:
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
|
2015-06-03 13:42:18 +03:00
|
|
|
enum method_missing_reason stat = MISSING_PRIVATE;
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (vm_ci_flag(ci) & VM_CALL_VCALL) stat |= MISSING_VCALL;
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_cc_method_missing_reason_set(cc, stat);
|
2018-09-18 16:52:08 +03:00
|
|
|
CC_SET_FASTPATH(cc, vm_call_method_missing, TRUE);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_missing(ec, cfp, calling);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
2015-10-06 13:25:25 +03:00
|
|
|
|
|
|
|
case METHOD_VISI_PROTECTED:
|
2022-06-16 23:28:39 +03:00
|
|
|
if (!(vm_ci_flag(ci) & (VM_CALL_OPT_SEND | VM_CALL_FCALL))) {
|
2022-05-27 23:47:06 +03:00
|
|
|
VALUE defined_class = vm_defined_class_for_protected_call(vm_cc_cme(cc));
|
|
|
|
if (!rb_obj_is_kind_of(cfp->self, defined_class)) {
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_cc_method_missing_reason_set(cc, MISSING_PROTECTED);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_missing(ec, cfp, calling);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
|
|
|
else {
|
2015-10-10 12:36:10 +03:00
|
|
|
/* caching method info to dummy cc */
|
2020-01-08 10:14:01 +03:00
|
|
|
VM_ASSERT(vm_cc_cme(cc) != NULL);
|
2020-06-10 04:19:48 +03:00
|
|
|
struct rb_callcache cc_on_stack = *cc;
|
|
|
|
FL_SET_RAW((VALUE)&cc_on_stack, VM_CALLCACHE_UNMARKABLE);
|
2020-12-14 23:40:38 +03:00
|
|
|
calling->cc = &cc_on_stack;
|
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
|
|
|
}
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_each_type(ec, cfp, calling);
|
2015-10-06 13:25:25 +03:00
|
|
|
|
|
|
|
default:
|
|
|
|
rb_bug("unreachable");
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method_nome(ec, cfp, calling);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_general(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_general);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method(ec, reg_cfp, calling);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
2021-08-24 01:22:14 +03:00
|
|
|
void
|
|
|
|
rb_vm_cc_general(const struct rb_callcache *cc)
|
|
|
|
{
|
|
|
|
VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache));
|
|
|
|
VM_ASSERT(cc != vm_cc_empty());
|
|
|
|
|
|
|
|
*(vm_call_handler *)&cc->call_ = vm_call_general;
|
|
|
|
}
|
|
|
|
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_call_super_method(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
{
|
2019-03-22 10:57:26 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(ccf_super_method);
|
|
|
|
|
2021-11-16 11:57:49 +03:00
|
|
|
// This line is introduced to make different from `vm_call_general` because some compilers (VC we found)
|
|
|
|
// can merge the function and the address of the function becomes same.
|
|
|
|
// The address of `vm_call_super_method` is used in `search_refined_method`, so it should be different.
|
|
|
|
if (ec == NULL) rb_bug("unreachable");
|
|
|
|
|
2015-10-05 22:44:05 +03:00
|
|
|
/* this check is required to distinguish with other functions. */
|
2021-11-16 11:52:20 +03:00
|
|
|
VM_ASSERT(vm_cc_call(calling->cc) == vm_call_super_method);
|
2020-12-14 23:40:38 +03:00
|
|
|
return vm_call_method(ec, reg_cfp, calling);
|
* revised r37993 to avoid SEGV/ILL in tests. In r37993, a method
entry with VM_METHOD_TYPE_REFINED holds only the original method
definition, so ci->me is set to a method entry allocated in the
stack, and it causes SEGV/ILL. In this commit, a method entry
with VM_METHOD_TYPE_REFINED holds the whole original method entry.
Furthermore, rb_thread_mark() is changed to mark cfp->klass to
avoid GC for iclasses created by copy_refinement_iclass().
* vm_method.c (rb_method_entry_make): add a method entry with
VM_METHOD_TYPE_REFINED to the class refined by the refinement if
the target module is a refinement. When a method entry with
VM_METHOD_TYPE_UNDEF is invoked by vm_call_method(), a method with
the same name is searched in refinements. If such a method is
found, the method is invoked. Otherwise, the original method in
the refined class (rb_method_definition_t::body.orig_me) is
invoked. This change is made to simplify the normal method lookup
and to improve the performance of normal method calls.
* vm_method.c (EXPR1, search_method, rb_method_entry),
vm_eval.c (rb_call0, rb_search_method_entry): do not use
refinements for method lookup.
* vm_insnhelper.c (vm_call_method): search methods in refinements if
ci->me is VM_METHOD_TYPE_REFINED. If the method is called by
super (i.e., ci->call == vm_call_super_method), skip the same
method entry as the current method to avoid infinite call of the
same method.
* class.c (include_modules_at): add a refined method entry for each
method defined in a module included in a refinement.
* class.c (rb_prepend_module): set an empty table to
RCLASS_M_TBL(klass) to add refined method entries, because
refinements should have priority over prepended modules.
* proc.c (mnew): use rb_method_entry_with_refinements() to get
a refined method.
* vm.c (rb_thread_mark): mark cfp->klass for iclasses created by
copy_refinement_iclass().
* vm.c (Init_VM), cont.c (fiber_init): initialize th->cfp->klass.
* test/ruby/test_refinement.rb (test_inline_method_cache): do not skip
the test because it should pass successfully.
* test/ruby/test_refinement.rb (test_redefine_refined_method): new
test for the case a refined method is redefined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-06 17:08:41 +04:00
|
|
|
}
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
/* super */
|
|
|
|
|
|
|
|
static inline VALUE
|
|
|
|
vm_search_normal_superclass(VALUE klass)
|
|
|
|
{
|
2012-12-08 07:36:58 +04:00
|
|
|
if (BUILTIN_TYPE(klass) == T_ICLASS &&
|
2022-05-05 22:10:36 +03:00
|
|
|
RB_TYPE_P(RBASIC(klass)->klass, T_MODULE) &&
|
2020-08-11 20:22:43 +03:00
|
|
|
FL_TEST_RAW(RBASIC(klass)->klass, RMODULE_IS_REFINEMENT)) {
|
* fix the behavior when a module is included into a refinement.
This change is a little tricky, so it might be better to prohibit
module inclusion to refinements.
* include/ruby/ruby.h (RMODULE_INCLUDED_INTO_REFINEMENT): new flag
to represent that a module (iclass) is included into a refinement.
* class.c (include_modules_at): set RMODULE_INCLUDED_INTO_REFINEMENT
if klass is a refinement.
* eval.c (rb_mod_refine): set the superclass of a refinement to the
refined class for super.
* eval.c (rb_using_refinement): skip the above superclass (the
refined class) when creating iclasses for refinements. Otherwise,
`using Refinement1; using Refinement2' creates iclasses:
<Refinement2> -> <RefinedClass> -> <Refinement1> -> RefinedClass,
where <Module> is an iclass for Module, so RefinedClass is
searched before Refinement1. The correct iclasses should be
<Refinement2> -> <Refinement1> -> RefinedClass.
* vm_insnhelper.c (vm_search_normal_superclass): if klass is an
iclass for a refinement, use the refinement's superclass instead
of the iclass's superclass. Otherwise, multiple refinements are
searched by super. For example, if a refinement Refinement2
includes a module M (i.e., Refinement2 -> <M> -> RefinedClass,
and if refinements iclasses are <Refinement2> -> <M>' ->
<Refinement1> -> RefinedClass, then super in <Refinement2> should
use Refinement2's superclass <M> instead of <Refinement2>'s
superclass <M>'.
* vm_insnhelper.c (vm_search_super_method): do not raise a
NotImplementError if current_defind_class is a module included
into a refinement. Because of the change of
vm_search_normal_superclass(), the receiver might not be an
instance of the module('s iclass).
* test/ruby/test_refinement.rb: related test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38298 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-10 20:05:45 +04:00
|
|
|
klass = RBASIC(klass)->klass;
|
2012-12-08 07:36:58 +04:00
|
|
|
}
|
* fix the behavior when a module is included into a refinement.
This change is a little tricky, so it might be better to prohibit
module inclusion to refinements.
* include/ruby/ruby.h (RMODULE_INCLUDED_INTO_REFINEMENT): new flag
to represent that a module (iclass) is included into a refinement.
* class.c (include_modules_at): set RMODULE_INCLUDED_INTO_REFINEMENT
if klass is a refinement.
* eval.c (rb_mod_refine): set the superclass of a refinement to the
refined class for super.
* eval.c (rb_using_refinement): skip the above superclass (the
refined class) when creating iclasses for refinements. Otherwise,
`using Refinement1; using Refinement2' creates iclasses:
<Refinement2> -> <RefinedClass> -> <Refinement1> -> RefinedClass,
where <Module> is an iclass for Module, so RefinedClass is
searched before Refinement1. The correct iclasses should be
<Refinement2> -> <Refinement1> -> RefinedClass.
* vm_insnhelper.c (vm_search_normal_superclass): if klass is an
iclass for a refinement, use the refinement's superclass instead
of the iclass's superclass. Otherwise, multiple refinements are
searched by super. For example, if a refinement Refinement2
includes a module M (i.e., Refinement2 -> <M> -> RefinedClass,
and if refinements iclasses are <Refinement2> -> <M>' ->
<Refinement1> -> RefinedClass, then super in <Refinement2> should
use Refinement2's superclass <M> instead of <Refinement2>'s
superclass <M>'.
* vm_insnhelper.c (vm_search_super_method): do not raise a
NotImplementError if current_defind_class is a module included
into a refinement. Because of the change of
vm_search_normal_superclass(), the receiver might not be an
instance of the module('s iclass).
* test/ruby/test_refinement.rb: related test.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@38298 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-12-10 20:05:45 +04:00
|
|
|
klass = RCLASS_ORIGIN(klass);
|
|
|
|
return RCLASS_SUPER(klass);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
2018-01-18 12:44:49 +03:00
|
|
|
NORETURN(static void vm_super_outside(void));
|
|
|
|
|
2012-10-14 23:58:59 +04:00
|
|
|
static void
|
|
|
|
vm_super_outside(void)
|
|
|
|
{
|
|
|
|
rb_raise(rb_eNoMethodError, "super called outside of method");
|
|
|
|
}
|
|
|
|
|
2021-11-16 11:57:49 +03:00
|
|
|
static const struct rb_callcache *
|
|
|
|
empty_cc_for_super(void)
|
|
|
|
{
|
|
|
|
return &vm_empty_cc_for_super;
|
|
|
|
}
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
static const struct rb_callcache *
|
2019-10-24 12:08:52 +03:00
|
|
|
vm_search_super_method(const rb_control_frame_t *reg_cfp, struct rb_call_data *cd, VALUE recv)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2020-12-14 23:40:38 +03:00
|
|
|
VALUE current_defined_class;
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(reg_cfp);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
* method.h: introduce rb_callable_method_entry_t to remove
rb_control_frame_t::klass.
[Bug #11278], [Bug #11279]
rb_method_entry_t data belong to modules/classes.
rb_method_entry_t::owner points defined module or class.
module M
def foo; end
end
In this case, owner is M.
rb_callable_method_entry_t data belong to only classes.
For modules, MRI creates corresponding T_ICLASS internally.
rb_callable_method_entry_t can also belong to T_ICLASS.
rb_callable_method_entry_t::defined_class points T_CLASS or
T_ICLASS.
rb_method_entry_t data for classes (not for modules) are also
rb_callable_method_entry_t data because it is completely same data.
In this case, rb_method_entry_t::owner == rb_method_entry_t::defined_class.
For example, there are classes C and D, and incldues M,
class C; include M; end
class D; include M; end
then, two T_ICLASS objects for C's super class and D's super class
will be created.
When C.new.foo is called, then M#foo is searcheed and
rb_callable_method_t data is used by VM to invoke M#foo.
rb_method_entry_t data is only one for M#foo.
However, rb_callable_method_entry_t data are two (and can be more).
It is proportional to the number of including (and prepending)
classes (the number of T_ICLASS which point to the module).
Now, created rb_callable_method_entry_t are collected when
the original module M was modified. We can think it is a cache.
We need to select what kind of method entry data is needed.
To operate definition, then you need to use rb_method_entry_t.
You can access them by the following functions.
* rb_method_entry(VALUE klass, ID id);
* rb_method_entry_with_refinements(VALUE klass, ID id);
* rb_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method(VALUE refinements, const rb_method_entry_t *me);
To invoke methods, then you need to use rb_callable_method_entry_t
which you can get by the following APIs corresponding to the
above listed functions.
* rb_callable_method_entry(VALUE klass, ID id);
* rb_callable_method_entry_with_refinements(VALUE klass, ID id);
* rb_callable_method_entry_without_refinements(VALUE klass, ID id);
* rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_entry_t *me);
VM pushes rb_callable_method_entry_t, so that rb_vm_frame_method_entry()
returns rb_callable_method_entry_t.
You can check a super class of current method by
rb_callable_method_entry_t::defined_class.
* method.h: renamed from rb_method_entry_t::klass to
rb_method_entry_t::owner.
* internal.h: add rb_classext_struct::callable_m_tbl to cache
rb_callable_method_entry_t data.
We need to consider abotu this field again because it is only
active for T_ICLASS.
* class.c (method_entry_i): ditto.
* class.c (rb_define_attr): rb_method_entry() does not takes
defiend_class_ptr.
* gc.c (mark_method_entry): mark RCLASS_CALLABLE_M_TBL() for T_ICLASS.
* cont.c (fiber_init): rb_control_frame_t::klass is removed.
* proc.c: fix `struct METHOD' data structure because
rb_callable_method_t has all information.
* vm_core.h: remove several fields.
* rb_control_frame_t::klass.
* rb_block_t::klass.
And catch up changes.
* eval.c: catch up changes.
* gc.c: ditto.
* insns.def: ditto.
* vm.c: ditto.
* vm_args.c: ditto.
* vm_backtrace.c: ditto.
* vm_dump.c: ditto.
* vm_eval.c: ditto.
* vm_insnhelper.c: ditto.
* vm_method.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-07-03 14:24:50 +03:00
|
|
|
if (!me) {
|
2012-10-14 23:58:59 +04:00
|
|
|
vm_super_outside();
|
|
|
|
}
|
|
|
|
|
2022-05-27 23:47:06 +03:00
|
|
|
current_defined_class = vm_defined_class_for_protected_call(me);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2014-01-10 13:01:44 +04:00
|
|
|
if (BUILTIN_TYPE(current_defined_class) != T_MODULE &&
|
2020-08-11 20:22:43 +03:00
|
|
|
reg_cfp->iseq != method_entry_iseqptr(me) &&
|
2018-12-26 03:59:40 +03:00
|
|
|
!rb_obj_is_kind_of(recv, current_defined_class)) {
|
2013-01-10 11:51:35 +04:00
|
|
|
VALUE m = RB_TYPE_P(current_defined_class, T_ICLASS) ?
|
2019-08-21 23:59:36 +03:00
|
|
|
RCLASS_INCLUDER(current_defined_class) : current_defined_class;
|
2013-01-10 11:51:35 +04:00
|
|
|
|
2019-08-21 23:59:36 +03:00
|
|
|
if (m) { /* not bound UnboundMethod */
|
|
|
|
rb_raise(rb_eTypeError,
|
|
|
|
"self has wrong type to call super in this context: "
|
|
|
|
"%"PRIsVALUE" (expected %"PRIsVALUE")",
|
|
|
|
rb_obj_class(recv), m);
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
if (me->def->type == VM_METHOD_TYPE_BMETHOD && (vm_ci_flag(cd->ci) & VM_CALL_ZSUPER)) {
|
2013-05-05 13:57:02 +04:00
|
|
|
rb_raise(rb_eRuntimeError,
|
|
|
|
"implicit argument passing of super from method defined"
|
|
|
|
" by define_method() is not supported."
|
|
|
|
" Specify all arguments explicitly.");
|
|
|
|
}
|
2015-07-09 05:10:51 +03:00
|
|
|
|
2020-09-24 21:00:51 +03:00
|
|
|
ID mid = me->def->original_id;
|
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
if (!vm_ci_markable(cd->ci)) {
|
|
|
|
VM_FORCE_WRITE((const VALUE *)&cd->ci->mid, (VALUE)mid);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// update iseq. really? (TODO)
|
|
|
|
cd->ci = vm_ci_new_runtime(mid,
|
|
|
|
vm_ci_flag(cd->ci),
|
|
|
|
vm_ci_argc(cd->ci),
|
|
|
|
vm_ci_kwarg(cd->ci));
|
2020-09-24 21:00:51 +03:00
|
|
|
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
RB_OBJ_WRITTEN(reg_cfp->iseq, Qundef, cd->ci);
|
|
|
|
}
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
const struct rb_callcache *cc;
|
|
|
|
|
|
|
|
VALUE klass = vm_search_normal_superclass(me->defined_class);
|
2015-07-09 05:10:51 +03:00
|
|
|
|
|
|
|
if (!klass) {
|
2014-01-08 17:53:18 +04:00
|
|
|
/* bound instance method of module */
|
2023-07-31 06:26:27 +03:00
|
|
|
cc = vm_cc_new(klass, NULL, vm_call_method_missing, cc_type_super);
|
2020-01-08 10:14:01 +03:00
|
|
|
RB_OBJ_WRITE(reg_cfp->iseq, &cd->cc, cc);
|
2014-01-08 17:53:18 +04:00
|
|
|
}
|
2015-07-09 05:10:51 +03:00
|
|
|
else {
|
2020-12-14 23:40:38 +03:00
|
|
|
cc = vm_search_method_fastpath((VALUE)reg_cfp->iseq, cd, klass);
|
|
|
|
const rb_callable_method_entry_t *cached_cme = vm_cc_cme(cc);
|
2020-01-08 10:14:01 +03:00
|
|
|
|
|
|
|
// define_method can cache for different method id
|
|
|
|
if (cached_cme == NULL) {
|
2021-11-16 11:57:49 +03:00
|
|
|
// empty_cc_for_super is not markable object
|
|
|
|
cd->cc = empty_cc_for_super();
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
|
|
|
else if (cached_cme->called_id != mid) {
|
|
|
|
const rb_callable_method_entry_t *cme = rb_callable_method_entry(klass, mid);
|
2021-11-16 11:57:49 +03:00
|
|
|
if (cme) {
|
2023-07-31 06:26:27 +03:00
|
|
|
cc = vm_cc_new(klass, cme, vm_call_super_method, cc_type_super);
|
2021-11-16 11:57:49 +03:00
|
|
|
RB_OBJ_WRITE(reg_cfp->iseq, &cd->cc, cc);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cd->cc = cc = empty_cc_for_super();
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
2020-04-15 09:24:50 +03:00
|
|
|
else {
|
|
|
|
switch (cached_cme->def->type) {
|
|
|
|
// vm_call_refined (search_refined_method) assumes cc->call is vm_call_super_method on invokesuper
|
|
|
|
case VM_METHOD_TYPE_REFINED:
|
2020-04-15 09:49:28 +03:00
|
|
|
// cc->klass is superclass of receiver class. Checking cc->klass is not enough to invalidate IVC for the receiver class.
|
2020-04-15 09:24:50 +03:00
|
|
|
case VM_METHOD_TYPE_ATTRSET:
|
2020-04-15 09:49:28 +03:00
|
|
|
case VM_METHOD_TYPE_IVAR:
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_cc_call_set(cc, vm_call_super_method); // invalidate fastpath
|
2020-04-15 09:24:50 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break; // use fastpath
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
}
|
2015-07-09 05:10:51 +03:00
|
|
|
}
|
2020-12-14 23:40:38 +03:00
|
|
|
|
2021-11-16 11:57:49 +03:00
|
|
|
VM_ASSERT((vm_cc_cme(cc), true));
|
|
|
|
|
2020-12-14 23:40:38 +03:00
|
|
|
return cc;
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* yield */
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
block_proc_is_lambda(const VALUE procval)
|
|
|
|
{
|
|
|
|
rb_proc_t *proc;
|
|
|
|
|
|
|
|
if (procval) {
|
|
|
|
GetProcPtr(procval, proc);
|
|
|
|
return proc->is_lambda;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
static VALUE
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_yield_with_cfunc(rb_execution_context_t *ec,
|
2016-07-28 14:02:30 +03:00
|
|
|
const struct rb_captured_block *captured,
|
2019-09-03 19:32:42 +03:00
|
|
|
VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler,
|
2018-11-09 04:13:20 +03:00
|
|
|
const rb_callable_method_entry_t *me)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2016-07-28 14:02:30 +03:00
|
|
|
int is_lambda = FALSE; /* TODO */
|
|
|
|
VALUE val, arg, blockarg;
|
2019-09-18 00:32:19 +03:00
|
|
|
int frame_flag;
|
2016-07-28 14:02:30 +03:00
|
|
|
const struct vm_ifunc *ifunc = captured->code.ifunc;
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
if (is_lambda) {
|
2012-10-14 23:58:59 +04:00
|
|
|
arg = rb_ary_new4(argc, argv);
|
|
|
|
}
|
|
|
|
else if (argc == 0) {
|
|
|
|
arg = Qnil;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
arg = argv[0];
|
|
|
|
}
|
|
|
|
|
2017-10-27 09:06:31 +03:00
|
|
|
blockarg = rb_vm_bh_to_procval(ec, block_handler);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2019-09-18 00:32:19 +03:00
|
|
|
frame_flag = VM_FRAME_MAGIC_IFUNC | VM_FRAME_FLAG_CFRAME | (me ? VM_FRAME_FLAG_BMETHOD : 0);
|
2019-10-04 22:51:57 +03:00
|
|
|
if (kw_splat) {
|
2020-02-21 18:17:31 +03:00
|
|
|
frame_flag |= VM_FRAME_FLAG_CFRAME_KW;
|
2019-09-18 00:32:19 +03:00
|
|
|
}
|
|
|
|
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_push_frame(ec, (const rb_iseq_t *)captured->code.ifunc,
|
2019-09-18 00:32:19 +03:00
|
|
|
frame_flag,
|
2016-07-28 14:02:30 +03:00
|
|
|
self,
|
|
|
|
VM_GUARDED_PREV_EP(captured->ep),
|
2018-11-09 04:13:20 +03:00
|
|
|
(VALUE)me,
|
2017-10-27 09:06:31 +03:00
|
|
|
0, ec->cfp->sp, 0, 0);
|
2019-08-26 08:25:53 +03:00
|
|
|
val = (*ifunc->func)(arg, (VALUE)ifunc->data, argc, argv, blockarg);
|
2017-10-27 09:06:31 +03:00
|
|
|
rb_vm_pop_frame(ec);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2023-02-03 18:14:42 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_yield_with_cfunc(rb_execution_context_t *ec, const struct rb_captured_block *captured, int argc, const VALUE *argv)
|
|
|
|
{
|
|
|
|
return vm_yield_with_cfunc(ec, captured, captured->self, argc, argv, 0, VM_BLOCK_HANDLER_NONE, NULL);
|
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
static VALUE
|
2019-09-03 19:32:42 +03:00
|
|
|
vm_yield_with_symbol(rb_execution_context_t *ec, VALUE symbol, int argc, const VALUE *argv, int kw_splat, VALUE block_handler)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
2019-09-04 00:54:37 +03:00
|
|
|
return rb_sym_proc_call(SYM2ID(symbol), argc, argv, kw_splat, rb_vm_bh_to_procval(ec, block_handler));
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
|
|
|
|
2015-12-22 14:20:12 +03:00
|
|
|
static inline int
|
|
|
|
vm_callee_setup_block_arg_arg0_splat(rb_control_frame_t *cfp, const rb_iseq_t *iseq, VALUE *argv, VALUE ary)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
long len = RARRAY_LEN(ary);
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
CHECK_VM_STACK_OVERFLOW(cfp, ISEQ_BODY(iseq)->param.lead_num);
|
2015-12-22 14:20:12 +03:00
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
for (i=0; i<len && i<ISEQ_BODY(iseq)->param.lead_num; i++) {
|
2015-12-22 14:20:12 +03:00
|
|
|
argv[i] = RARRAY_AREF(ary, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
|
|
|
vm_callee_setup_block_arg_arg0_check(VALUE *argv)
|
|
|
|
{
|
|
|
|
VALUE ary, arg0 = argv[0];
|
|
|
|
ary = rb_check_array_type(arg0);
|
2017-03-18 14:29:35 +03:00
|
|
|
#if 0
|
2015-12-22 14:20:12 +03:00
|
|
|
argv[0] = arg0;
|
2017-03-18 14:29:35 +03:00
|
|
|
#else
|
|
|
|
VM_ASSERT(argv[0] == arg0);
|
|
|
|
#endif
|
2015-12-22 14:20:12 +03:00
|
|
|
return ary;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
vm_callee_setup_block_arg(rb_execution_context_t *ec, struct rb_calling_info *calling, const struct rb_callinfo *ci, const rb_iseq_t *iseq, VALUE *argv, const enum arg_setup_type arg_setup_type)
|
2015-12-22 14:20:12 +03:00
|
|
|
{
|
2018-02-05 18:49:32 +03:00
|
|
|
if (rb_simple_iseq_p(iseq)) {
|
2017-10-27 09:06:31 +03:00
|
|
|
rb_control_frame_t *cfp = ec->cfp;
|
2015-12-22 14:20:12 +03:00
|
|
|
VALUE arg0;
|
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(cfp, calling, ci, ISEQ_BODY(iseq)->param.lead_num);
|
2019-09-05 13:29:25 +03:00
|
|
|
|
2015-12-22 14:20:12 +03:00
|
|
|
if (arg_setup_type == arg_setup_block &&
|
|
|
|
calling->argc == 1 &&
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->param.flags.has_lead &&
|
|
|
|
!ISEQ_BODY(iseq)->param.flags.ambiguous_param0 &&
|
2015-12-22 14:20:12 +03:00
|
|
|
!NIL_P(arg0 = vm_callee_setup_block_arg_arg0_check(argv))) {
|
|
|
|
calling->argc = vm_callee_setup_block_arg_arg0_splat(cfp, iseq, argv, arg0);
|
|
|
|
}
|
|
|
|
|
2022-03-23 22:19:48 +03:00
|
|
|
if (calling->argc != ISEQ_BODY(iseq)->param.lead_num) {
|
2015-12-22 14:20:12 +03:00
|
|
|
if (arg_setup_type == arg_setup_block) {
|
2022-03-23 22:19:48 +03:00
|
|
|
if (calling->argc < ISEQ_BODY(iseq)->param.lead_num) {
|
2015-12-22 14:20:12 +03:00
|
|
|
int i;
|
2022-03-23 22:19:48 +03:00
|
|
|
CHECK_VM_STACK_OVERFLOW(cfp, ISEQ_BODY(iseq)->param.lead_num);
|
|
|
|
for (i=calling->argc; i<ISEQ_BODY(iseq)->param.lead_num; i++) argv[i] = Qnil;
|
|
|
|
calling->argc = ISEQ_BODY(iseq)->param.lead_num; /* fill rest parameters */
|
2015-12-22 14:20:12 +03:00
|
|
|
}
|
2022-03-23 22:19:48 +03:00
|
|
|
else if (calling->argc > ISEQ_BODY(iseq)->param.lead_num) {
|
|
|
|
calling->argc = ISEQ_BODY(iseq)->param.lead_num; /* simply truncate arguments */
|
2015-12-22 14:20:12 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2022-03-23 22:19:48 +03:00
|
|
|
argument_arity_error(ec, iseq, calling->argc, ISEQ_BODY(iseq)->param.lead_num, ISEQ_BODY(iseq)->param.lead_num);
|
2015-12-22 14:20:12 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else {
|
2017-10-27 09:06:31 +03:00
|
|
|
return setup_parameters_complex(ec, iseq, calling, ci, argv, arg_setup_type);
|
2015-12-22 14:20:12 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
static int
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
vm_yield_setup_args(rb_execution_context_t *ec, const rb_iseq_t *iseq, const int argc, VALUE *argv, int flags, VALUE block_handler, enum arg_setup_type arg_setup_type)
|
2012-10-14 23:58:59 +04:00
|
|
|
{
|
2015-09-19 20:59:58 +03:00
|
|
|
struct rb_calling_info calling_entry, *calling;
|
|
|
|
|
|
|
|
calling = &calling_entry;
|
|
|
|
calling->argc = argc;
|
2016-07-28 14:02:30 +03:00
|
|
|
calling->block_handler = block_handler;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
calling->kw_splat = (flags & VM_CALL_KW_SPLAT) ? 1 : 0;
|
2019-03-18 08:25:47 +03:00
|
|
|
calling->recv = Qundef;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
calling->heap_argv = 0;
|
|
|
|
struct rb_callinfo dummy_ci = VM_CI_ON_STACK(0, flags, 0, 0);
|
2015-09-19 20:59:58 +03:00
|
|
|
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
return vm_callee_setup_block_arg(ec, calling, &dummy_ci, iseq, argv, arg_setup_type);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
/* ruby iseq -> ruby block */
|
2015-10-10 23:32:07 +03:00
|
|
|
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
static VALUE
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_invoke_iseq_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
2020-05-26 04:22:51 +03:00
|
|
|
bool is_lambda, VALUE block_handler)
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
{
|
2020-05-25 12:00:15 +03:00
|
|
|
const struct rb_captured_block *captured = VM_BH_TO_ISEQ_BLOCK(block_handler);
|
2017-02-16 12:15:26 +03:00
|
|
|
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
|
2022-03-23 22:19:48 +03:00
|
|
|
const int arg_size = ISEQ_BODY(iseq)->param.size;
|
2016-07-28 14:02:30 +03:00
|
|
|
VALUE * const rsp = GET_SP() - calling->argc;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
VALUE * const argv = rsp;
|
|
|
|
int opt_pc = vm_callee_setup_block_arg(ec, calling, ci, iseq, argv, is_lambda ? arg_setup_method : arg_setup_block);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
SET_SP(rsp);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_push_frame(ec, iseq,
|
2017-06-03 13:07:44 +03:00
|
|
|
VM_FRAME_MAGIC_BLOCK | (is_lambda ? VM_FRAME_FLAG_LAMBDA : 0),
|
2016-07-28 14:02:30 +03:00
|
|
|
captured->self,
|
|
|
|
VM_GUARDED_PREV_EP(captured->ep), 0,
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
|
2016-07-28 14:02:30 +03:00
|
|
|
rsp + arg_size,
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(iseq)->local_table_size - arg_size, ISEQ_BODY(iseq)->stack_max);
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
static VALUE
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_invoke_symbol_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
2020-05-26 04:22:51 +03:00
|
|
|
MAYBE_UNUSED(bool is_lambda), VALUE block_handler)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
VALUE symbol = VM_BH_TO_SYMBOL(block_handler);
|
Optimize symproc calls
Similar to the bmethod/send optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the receiver argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* symproc.(recv) ~5%
* symproc.(recv, *args) ~65% for args.length == 200
* symproc.(recv, *args, **kw) ~45% for args.length == 200
* symproc.(recv, **kw) ~30%
* symproc.(recv, kw: 1) ~100%
Note that empty argument splats do get slower with this approach,
by about 2-3%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* symproc.(*args)
* symproc.(*args, **kw)
This is because the you cannot shift the receiver argument off
without first splatting the arg.
2023-04-02 21:06:13 +03:00
|
|
|
int flags = vm_ci_flag(ci);
|
|
|
|
|
|
|
|
if (UNLIKELY(!(flags & VM_CALL_ARGS_SIMPLE) &&
|
|
|
|
((calling->argc == 0) ||
|
|
|
|
(calling->argc == 1 && (flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT))) ||
|
|
|
|
(calling->argc == 2 && (flags & VM_CALL_ARGS_SPLAT) && (flags & VM_CALL_KW_SPLAT)) ||
|
|
|
|
((flags & VM_CALL_KWARG) && (vm_ci_kwarg(ci)->keyword_len == calling->argc))))) {
|
|
|
|
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV);
|
|
|
|
flags = 0;
|
|
|
|
if (UNLIKELY(calling->heap_argv)) {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
#if VM_ARGC_STACK_MAX < 0
|
Optimize symproc calls
Similar to the bmethod/send optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the receiver argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* symproc.(recv) ~5%
* symproc.(recv, *args) ~65% for args.length == 200
* symproc.(recv, *args, **kw) ~45% for args.length == 200
* symproc.(recv, **kw) ~30%
* symproc.(recv, kw: 1) ~100%
Note that empty argument splats do get slower with this approach,
by about 2-3%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* symproc.(*args)
* symproc.(*args, **kw)
This is because the you cannot shift the receiver argument off
without first splatting the arg.
2023-04-02 21:06:13 +03:00
|
|
|
if (RARRAY_LEN(calling->heap_argv) < 1) {
|
|
|
|
rb_raise(rb_eArgError, "no receiver given");
|
|
|
|
}
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
#endif
|
Optimize symproc calls
Similar to the bmethod/send optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the receiver argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* symproc.(recv) ~5%
* symproc.(recv, *args) ~65% for args.length == 200
* symproc.(recv, *args, **kw) ~45% for args.length == 200
* symproc.(recv, **kw) ~30%
* symproc.(recv, kw: 1) ~100%
Note that empty argument splats do get slower with this approach,
by about 2-3%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* symproc.(*args)
* symproc.(*args, **kw)
This is because the you cannot shift the receiver argument off
without first splatting the arg.
2023-04-02 21:06:13 +03:00
|
|
|
calling->recv = rb_ary_shift(calling->heap_argv);
|
|
|
|
// Modify stack to avoid cfp consistency error
|
|
|
|
reg_cfp->sp++;
|
|
|
|
reg_cfp->sp[-1] = reg_cfp->sp[-2];
|
|
|
|
reg_cfp->sp[-2] = calling->recv;
|
|
|
|
flags |= VM_CALL_ARGS_SPLAT;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (calling->argc < 1) {
|
|
|
|
rb_raise(rb_eArgError, "no receiver given");
|
|
|
|
}
|
|
|
|
calling->recv = TOPN(--calling->argc);
|
|
|
|
}
|
|
|
|
if (calling->kw_splat) {
|
|
|
|
flags |= VM_CALL_KW_SPLAT;
|
|
|
|
}
|
2020-05-27 16:36:12 +03:00
|
|
|
}
|
2020-05-28 06:33:53 +03:00
|
|
|
else {
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
if (calling->argc < 1) {
|
|
|
|
rb_raise(rb_eArgError, "no receiver given");
|
|
|
|
}
|
2020-05-28 06:33:53 +03:00
|
|
|
calling->recv = TOPN(--calling->argc);
|
2020-05-27 16:36:12 +03:00
|
|
|
}
|
Optimize symproc calls
Similar to the bmethod/send optimization, this avoids using
CALLER_ARG_SPLAT if not necessary. As long as the receiver argument
can be shifted off, other arguments are passed through as-is.
This optimizes the following types of calls:
* symproc.(recv) ~5%
* symproc.(recv, *args) ~65% for args.length == 200
* symproc.(recv, *args, **kw) ~45% for args.length == 200
* symproc.(recv, **kw) ~30%
* symproc.(recv, kw: 1) ~100%
Note that empty argument splats do get slower with this approach,
by about 2-3%. This is probably because iseq argument setup is
slower for empty argument splats than CALLER_SETUP_ARG is. Other
than non-empty argument splats, other argument splats are faster,
with the speedup depending on the number of arguments.
The following types of calls are not optimized:
* symproc.(*args)
* symproc.(*args, **kw)
This is because the you cannot shift the receiver argument off
without first splatting the arg.
2023-04-02 21:06:13 +03:00
|
|
|
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
return vm_call_symbol(ec, reg_cfp, calling, ci, symbol, flags);
|
2016-07-28 14:02:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2017-10-27 09:06:31 +03:00
|
|
|
vm_invoke_ifunc_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
2020-05-26 04:22:51 +03:00
|
|
|
MAYBE_UNUSED(bool is_lambda), VALUE block_handler)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
|
|
|
VALUE val;
|
|
|
|
int argc;
|
2020-05-25 12:00:15 +03:00
|
|
|
const struct rb_captured_block *captured = VM_BH_TO_IFUNC_BLOCK(block_handler);
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
CALLER_SETUP_ARG(ec->cfp, calling, ci, ALLOW_HEAP_ARGV_KEEP_KWSPLAT);
|
2016-07-28 14:02:30 +03:00
|
|
|
argc = calling->argc;
|
Generalize cfunc large array splat fix to fix many additional cases raising SystemStackError
Originally, when 2e7bceb34ea858649e1f975a934ce1894d1f06a6 fixed cfuncs to no
longer use the VM stack for large array splats, it was thought to have fully
fixed Bug #4040, since the issue was fixed for methods defined in Ruby (iseqs)
back in Ruby 2.2.
After additional research, I determined that same issue affects almost all
types of method calls, not just iseq and cfunc calls. There were two main
types of remaining issues, important cases (where large array splat should
work) and pedantic cases (where large array splat raised SystemStackError
instead of ArgumentError).
Important cases:
```ruby
define_method(:a){|*a|}
a(*1380888.times)
def b(*a); end
send(:b, *1380888.times)
:b.to_proc.call(self, *1380888.times)
def d; yield(*1380888.times) end
d(&method(:b))
def self.method_missing(*a); end
not_a_method(*1380888.times)
```
Pedantic cases:
```ruby
def a; end
a(*1380888.times)
def b(_); end
b(*1380888.times)
def c(_=nil); end
c(*1380888.times)
c = Class.new do
attr_accessor :a
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
c = Struct.new(:a) do
alias b a=
end.new
c.a(*1380888.times)
c.b(*1380888.times)
```
This patch fixes all usage of CALLER_SETUP_ARG with splatting a large
number of arguments, and required similar fixes to use a temporary
hidden array in three other cases where the VM would use the VM stack
for handling a large number of arguments. However, it is possible
there may be additional cases where splatting a large number
of arguments still causes a SystemStackError.
This has a measurable performance impact, as it requires additional
checks for a large number of arguments in many additional cases.
This change is fairly invasive, as there were many different VM
functions that needed to be modified to support this. To avoid
too much API change, I modified struct rb_calling_info to add a
heap_argv member for storing the array, so I would not have to
thread it through many functions. This struct is always stack
allocated, which helps ensure sure GC doesn't collect it early.
Because of how invasive the changes are, and how rarely large
arrays are actually splatted in Ruby code, the existing test/spec
suites are not great at testing for correct behavior. To try to
find and fix all issues, I tested this in CI with
VM_ARGC_STACK_MAX to -1, ensuring that a temporary array is used
for all array splat method calls. This was very helpful in
finding breaking cases, especially ones involving flagged keyword
hashes.
Fixes [Bug #4040]
Co-authored-by: Jimmy Miller <jimmy.miller@shopify.com>
2023-03-07 02:58:58 +03:00
|
|
|
val = vm_yield_with_cfunc(ec, captured, captured->self, CALLING_ARGC(calling), calling->heap_argv ? RARRAY_CONST_PTR(calling->heap_argv) : STACK_ADDR_FROM_TOP(argc), calling->kw_splat, calling->block_handler, NULL);
|
2016-07-28 14:02:30 +03:00
|
|
|
POPN(argc); /* TODO: should put before C/yield? */
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_proc_to_block_handler(VALUE procval)
|
|
|
|
{
|
|
|
|
const struct rb_block *block = vm_proc_block(procval);
|
2012-10-14 23:58:59 +04:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
switch (vm_block_type(block)) {
|
|
|
|
case block_type_iseq:
|
|
|
|
return VM_BH_FROM_ISEQ_BLOCK(&block->as.captured);
|
|
|
|
case block_type_ifunc:
|
|
|
|
return VM_BH_FROM_IFUNC_BLOCK(&block->as.captured);
|
|
|
|
case block_type_symbol:
|
|
|
|
return VM_BH_FROM_SYMBOL(block->as.symbol);
|
|
|
|
case block_type_proc:
|
|
|
|
return VM_BH_FROM_PROC(block->as.proc);
|
2012-10-14 23:58:59 +04:00
|
|
|
}
|
2016-07-28 14:02:30 +03:00
|
|
|
VM_UNREACHABLE(vm_yield_with_proc);
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
|
2020-05-26 04:14:17 +03:00
|
|
|
static VALUE
|
|
|
|
vm_invoke_proc_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
2020-05-31 11:34:58 +03:00
|
|
|
bool is_lambda, VALUE block_handler)
|
2020-05-26 04:14:17 +03:00
|
|
|
{
|
2020-05-31 11:34:58 +03:00
|
|
|
while (vm_block_handler_type(block_handler) == block_handler_type_proc) {
|
|
|
|
VALUE proc = VM_BH_TO_PROC(block_handler);
|
|
|
|
is_lambda = block_proc_is_lambda(proc);
|
|
|
|
block_handler = vm_proc_to_block_handler(proc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vm_invoke_block(ec, reg_cfp, calling, ci, is_lambda, block_handler);
|
2020-05-26 04:14:17 +03:00
|
|
|
}
|
|
|
|
|
2018-01-07 22:18:49 +03:00
|
|
|
static inline VALUE
|
|
|
|
vm_invoke_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
2020-05-26 04:14:17 +03:00
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
|
|
|
bool is_lambda, VALUE block_handler)
|
2016-07-28 14:02:30 +03:00
|
|
|
{
|
2020-05-29 04:47:09 +03:00
|
|
|
VALUE (*func)(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
|
|
|
|
struct rb_calling_info *calling, const struct rb_callinfo *ci,
|
|
|
|
bool is_lambda, VALUE block_handler);
|
2020-05-26 05:24:15 +03:00
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
switch (vm_block_handler_type(block_handler)) {
|
2020-05-26 05:24:15 +03:00
|
|
|
case block_handler_type_iseq: func = vm_invoke_iseq_block; break;
|
|
|
|
case block_handler_type_ifunc: func = vm_invoke_ifunc_block; break;
|
|
|
|
case block_handler_type_proc: func = vm_invoke_proc_block; break;
|
|
|
|
case block_handler_type_symbol: func = vm_invoke_symbol_block; break;
|
2020-06-21 10:34:31 +03:00
|
|
|
default: rb_bug("vm_invoke_block: unreachable");
|
2020-05-26 05:24:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return func(ec, reg_cfp, calling, ci, is_lambda, block_handler);
|
* insns.def (send, invokesuper, invokeblock, opt_*), vm_core.h:
use only a `ci' (rb_call_info_t) parameter instead of using
parameters such as `op_id', 'op_argc', `blockiseq' and flag.
These information are stored in rb_call_info_t at the compile
time.
This technique simplifies parameter passings at related
function calls (~10% speedups for simple mehtod invocation at
my machine).
`rb_call_info_t' also has new function pointer variable `call'.
This `call' variable enables to customize method (block)
invocation process for each place. However, it always call
`vm_call_general()' at this changes.
`rb_call_info_t' also has temporary variables for method
(block) invocation.
* vm_core.h, compile.c, insns.def: introduce VM_CALL_ARGS_SKIP_SETUP
VM_CALL macro. This flag indicates that this call can skip
caller_setup (block arg and splat arg).
* compile.c: catch up above changes.
* iseq.c: catch up above changes (especially for TS_CALLINFO).
* tool/instruction.rb: catch up above chagnes.
* vm_insnhelper.c, vm_insnhelper.h: ditto. Macros and functions
parameters are changed.
* vm_eval.c (vm_call0): ditto (it will be rewriten soon).
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-10-14 20:59:05 +04:00
|
|
|
}
|
2013-08-20 21:41:13 +04:00
|
|
|
|
|
|
|
static VALUE
|
2015-07-22 00:28:43 +03:00
|
|
|
vm_make_proc_with_iseq(const rb_iseq_t *blockiseq)
|
2013-08-20 21:41:13 +04:00
|
|
|
{
|
2017-10-26 11:41:34 +03:00
|
|
|
const rb_execution_context_t *ec = GET_EC();
|
|
|
|
const rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(ec, ec->cfp);
|
2016-07-28 14:02:30 +03:00
|
|
|
struct rb_captured_block *captured;
|
2013-08-20 21:41:13 +04:00
|
|
|
|
|
|
|
if (cfp == 0) {
|
2013-09-22 15:57:50 +04:00
|
|
|
rb_bug("vm_make_proc_with_iseq: unreachable");
|
2013-08-20 21:41:13 +04:00
|
|
|
}
|
|
|
|
|
2016-07-28 14:02:30 +03:00
|
|
|
captured = VM_CFP_TO_CAPTURED_BLOCK(cfp);
|
|
|
|
captured->code.iseq = blockiseq;
|
2013-08-20 21:41:13 +04:00
|
|
|
|
2017-10-26 11:41:34 +03:00
|
|
|
return rb_vm_make_proc(ec, captured, rb_cProc);
|
2013-08-20 21:41:13 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2014-06-20 10:59:28 +04:00
|
|
|
vm_once_exec(VALUE iseq)
|
2013-08-20 21:41:13 +04:00
|
|
|
{
|
2014-06-20 10:59:28 +04:00
|
|
|
VALUE proc = vm_make_proc_with_iseq((rb_iseq_t *)iseq);
|
2013-08-20 21:41:13 +04:00
|
|
|
return rb_proc_call_with_block(proc, 0, 0, Qnil);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_once_clear(VALUE data)
|
|
|
|
{
|
|
|
|
union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)data;
|
|
|
|
is->once.running_thread = NULL;
|
|
|
|
return Qnil;
|
|
|
|
}
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
|
2015-06-02 22:15:29 +03:00
|
|
|
/* defined insn */
|
|
|
|
|
2021-03-17 19:10:42 +03:00
|
|
|
static bool
|
2015-06-02 23:03:54 +03:00
|
|
|
check_respond_to_missing(VALUE obj, VALUE v)
|
2015-06-02 22:49:22 +03:00
|
|
|
{
|
|
|
|
VALUE args[2];
|
|
|
|
VALUE r;
|
|
|
|
|
|
|
|
args[0] = obj; args[1] = Qfalse;
|
|
|
|
r = rb_check_funcall(v, idRespond_to_missing, 2, args);
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(r) && RTEST(r)) {
|
2021-03-17 19:10:42 +03:00
|
|
|
return true;
|
2015-06-02 22:49:22 +03:00
|
|
|
}
|
|
|
|
else {
|
2021-03-17 19:10:42 +03:00
|
|
|
return false;
|
2015-06-02 22:49:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-17 19:10:42 +03:00
|
|
|
static bool
|
2021-03-17 01:16:51 +03:00
|
|
|
vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v)
|
2015-06-02 22:15:29 +03:00
|
|
|
{
|
|
|
|
VALUE klass;
|
|
|
|
enum defined_type type = (enum defined_type)op_type;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case DEFINED_IVAR:
|
2021-03-17 01:25:37 +03:00
|
|
|
return rb_ivar_defined(GET_SELF(), SYM2ID(obj));
|
2015-06-02 22:15:29 +03:00
|
|
|
break;
|
|
|
|
case DEFINED_GVAR:
|
2021-03-17 01:25:37 +03:00
|
|
|
return rb_gvar_defined(SYM2ID(obj));
|
2015-06-02 22:15:29 +03:00
|
|
|
break;
|
|
|
|
case DEFINED_CVAR: {
|
2019-04-05 11:15:21 +03:00
|
|
|
const rb_cref_t *cref = vm_get_cref(GET_EP());
|
2020-03-28 01:08:52 +03:00
|
|
|
klass = vm_get_cvar_base(cref, GET_CFP(), 0);
|
2021-03-17 01:25:37 +03:00
|
|
|
return rb_cvar_defined(klass, SYM2ID(obj));
|
2015-06-02 22:15:29 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case DEFINED_CONST:
|
2019-11-06 09:47:32 +03:00
|
|
|
case DEFINED_CONST_FROM: {
|
|
|
|
bool allow_nil = type == DEFINED_CONST;
|
2015-06-02 22:15:29 +03:00
|
|
|
klass = v;
|
2021-03-17 01:25:37 +03:00
|
|
|
return vm_get_ev_const(ec, klass, SYM2ID(obj), allow_nil, true);
|
2015-06-02 22:15:29 +03:00
|
|
|
break;
|
2019-11-06 09:47:32 +03:00
|
|
|
}
|
2015-06-02 22:15:29 +03:00
|
|
|
case DEFINED_FUNC:
|
|
|
|
klass = CLASS_OF(v);
|
2021-03-17 01:25:37 +03:00
|
|
|
return rb_ec_obj_respond_to(ec, v, SYM2ID(obj), TRUE);
|
2015-06-02 22:15:29 +03:00
|
|
|
break;
|
|
|
|
case DEFINED_METHOD:{
|
|
|
|
VALUE klass = CLASS_OF(v);
|
2020-06-03 17:05:55 +03:00
|
|
|
const rb_method_entry_t *me = rb_method_entry_with_refinements(klass, SYM2ID(obj), NULL);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2015-06-02 22:15:29 +03:00
|
|
|
if (me) {
|
2015-06-06 13:19:48 +03:00
|
|
|
switch (METHOD_ENTRY_VISI(me)) {
|
2015-06-03 04:39:16 +03:00
|
|
|
case METHOD_VISI_PRIVATE:
|
|
|
|
break;
|
|
|
|
case METHOD_VISI_PROTECTED:
|
2020-06-02 12:55:06 +03:00
|
|
|
if (!rb_obj_is_kind_of(GET_SELF(), rb_class_real(me->defined_class))) {
|
2015-06-03 04:39:16 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case METHOD_VISI_PUBLIC:
|
2021-03-17 19:10:42 +03:00
|
|
|
return true;
|
2015-06-03 04:39:16 +03:00
|
|
|
break;
|
|
|
|
default:
|
2015-06-06 13:19:48 +03:00
|
|
|
rb_bug("vm_defined: unreachable: %u", (unsigned int)METHOD_ENTRY_VISI(me));
|
2015-06-02 22:15:29 +03:00
|
|
|
}
|
|
|
|
}
|
2015-06-02 22:36:43 +03:00
|
|
|
else {
|
2021-03-17 01:25:37 +03:00
|
|
|
return check_respond_to_missing(obj, v);
|
2015-06-02 22:15:29 +03:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case DEFINED_YIELD:
|
2016-07-28 14:02:30 +03:00
|
|
|
if (GET_BLOCK_HANDLER() != VM_BLOCK_HANDLER_NONE) {
|
2021-03-17 19:10:42 +03:00
|
|
|
return true;
|
2015-06-02 22:15:29 +03:00
|
|
|
}
|
|
|
|
break;
|
2015-07-09 05:10:51 +03:00
|
|
|
case DEFINED_ZSUPER:
|
|
|
|
{
|
|
|
|
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(GET_CFP());
|
|
|
|
|
|
|
|
if (me) {
|
|
|
|
VALUE klass = vm_search_normal_superclass(me->defined_class);
|
2023-04-26 16:05:30 +03:00
|
|
|
if (!klass) return false;
|
2023-04-26 06:22:10 +03:00
|
|
|
|
2015-07-09 05:10:51 +03:00
|
|
|
ID id = me->def->original_id;
|
|
|
|
|
2021-03-17 01:25:37 +03:00
|
|
|
return rb_method_boundp(klass, id, 0);
|
2015-06-02 22:15:29 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2023-06-27 17:14:10 +03:00
|
|
|
case DEFINED_REF:
|
|
|
|
return RTEST(vm_backref_defined(ec, GET_LEP(), FIX2INT(obj)));
|
2015-06-02 22:15:29 +03:00
|
|
|
default:
|
|
|
|
rb_bug("unimplemented defined? type (VM)");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-03-17 19:10:42 +03:00
|
|
|
return false;
|
2015-06-02 22:15:29 +03:00
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
2021-05-11 23:06:07 +03:00
|
|
|
bool
|
|
|
|
rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v)
|
|
|
|
{
|
|
|
|
return vm_defined(ec, reg_cfp, op_type, obj, v);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static const VALUE *
|
|
|
|
vm_get_ep(const VALUE *const reg_ep, rb_num_t lv)
|
|
|
|
{
|
|
|
|
rb_num_t i;
|
|
|
|
const VALUE *ep = reg_ep;
|
|
|
|
for (i = 0; i < lv; i++) {
|
|
|
|
ep = GET_PREV_EP(ep);
|
|
|
|
}
|
|
|
|
return ep;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_get_special_object(const VALUE *const reg_ep,
|
|
|
|
enum vm_special_object_type type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
2017-04-18 16:14:05 +03:00
|
|
|
case VM_SPECIAL_OBJECT_VMCORE:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return rb_mRubyVMFrozenCore;
|
2017-04-18 16:14:05 +03:00
|
|
|
case VM_SPECIAL_OBJECT_CBASE:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return vm_get_cbase(reg_ep);
|
2017-04-18 16:14:05 +03:00
|
|
|
case VM_SPECIAL_OBJECT_CONST_BASE:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return vm_get_const_base(reg_ep);
|
2017-04-18 16:14:05 +03:00
|
|
|
default:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_bug("putspecialobject insn: unknown value_type %d", type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_concat_array(VALUE ary1, VALUE ary2st)
|
|
|
|
{
|
|
|
|
const VALUE ary2 = ary2st;
|
2018-01-27 12:27:47 +03:00
|
|
|
VALUE tmp1 = rb_check_to_array(ary1);
|
|
|
|
VALUE tmp2 = rb_check_to_array(ary2);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
|
|
|
if (NIL_P(tmp1)) {
|
|
|
|
tmp1 = rb_ary_new3(1, ary1);
|
|
|
|
}
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
if (tmp1 == ary1) {
|
|
|
|
tmp1 = rb_ary_dup(ary1);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
|
|
|
if (NIL_P(tmp2)) {
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
return rb_ary_push(tmp1, ary2);
|
|
|
|
} else {
|
|
|
|
return rb_ary_concat(tmp1, tmp2);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
static VALUE
|
|
|
|
vm_concat_to_array(VALUE ary1, VALUE ary2st)
|
|
|
|
{
|
|
|
|
/* ary1 must be a newly created array */
|
|
|
|
const VALUE ary2 = ary2st;
|
|
|
|
VALUE tmp2 = rb_check_to_array(ary2);
|
|
|
|
|
|
|
|
if (NIL_P(tmp2)) {
|
|
|
|
return rb_ary_push(ary1, ary2);
|
|
|
|
} else {
|
|
|
|
return rb_ary_concat(ary1, tmp2);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-15 19:54:26 +03:00
|
|
|
// YJIT implementation is using the C function
|
|
|
|
// and needs to call a non-static function
|
|
|
|
VALUE
|
|
|
|
rb_vm_concat_array(VALUE ary1, VALUE ary2st)
|
|
|
|
{
|
|
|
|
return vm_concat_array(ary1, ary2st);
|
|
|
|
}
|
|
|
|
|
2024-01-26 00:45:58 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_concat_to_array(VALUE ary1, VALUE ary2st)
|
|
|
|
{
|
|
|
|
return vm_concat_to_array(ary1, ary2st);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_splat_array(VALUE flag, VALUE ary)
|
|
|
|
{
|
2018-01-27 12:27:47 +03:00
|
|
|
VALUE tmp = rb_check_to_array(ary);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
if (NIL_P(tmp)) {
|
|
|
|
return rb_ary_new3(1, ary);
|
|
|
|
}
|
|
|
|
else if (RTEST(flag)) {
|
|
|
|
return rb_ary_dup(tmp);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-15 19:54:26 +03:00
|
|
|
// YJIT implementation is using the C function
|
|
|
|
// and needs to call a non-static function
|
2021-07-14 22:16:56 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_splat_array(VALUE flag, VALUE ary)
|
|
|
|
{
|
|
|
|
return vm_splat_array(flag, ary);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
2017-11-16 09:10:31 +03:00
|
|
|
vm_check_match(rb_execution_context_t *ec, VALUE target, VALUE pattern, rb_num_t flag)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
|
|
|
enum vm_check_match_type type = ((int)flag) & VM_CHECKMATCH_TYPE_MASK;
|
|
|
|
|
|
|
|
if (flag & VM_CHECKMATCH_ARRAY) {
|
|
|
|
long i;
|
|
|
|
const long n = RARRAY_LEN(pattern);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
VALUE v = RARRAY_AREF(pattern, i);
|
2017-11-16 09:10:31 +03:00
|
|
|
VALUE c = check_match(ec, v, target, type);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
|
|
|
if (RTEST(c)) {
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Qfalse;
|
|
|
|
}
|
|
|
|
else {
|
2017-11-16 09:10:31 +03:00
|
|
|
return check_match(ec, pattern, target, type);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-11 06:08:29 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_check_match(rb_execution_context_t *ec, VALUE target, VALUE pattern, rb_num_t flag)
|
|
|
|
{
|
|
|
|
return vm_check_match(ec, target, pattern, flag);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_check_keyword(lindex_t bits, lindex_t idx, const VALUE *ep)
|
|
|
|
{
|
|
|
|
const VALUE kw_bits = *(ep - bits);
|
|
|
|
|
|
|
|
if (FIXNUM_P(kw_bits)) {
|
2018-01-19 07:23:59 +03:00
|
|
|
unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
|
2018-01-19 06:09:24 +03:00
|
|
|
if ((idx < KW_SPECIFIED_BITS_MAX) && (b & (0x01 << idx)))
|
|
|
|
return Qfalse;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
VM_ASSERT(RB_TYPE_P(kw_bits, T_HASH));
|
2018-01-19 05:36:32 +03:00
|
|
|
if (rb_hash_has_key(kw_bits, INT2FIX(idx))) return Qfalse;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2018-01-19 05:36:32 +03:00
|
|
|
return Qtrue;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2017-11-07 11:19:25 +03:00
|
|
|
vm_dtrace(rb_event_flag_t flag, rb_execution_context_t *ec)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
|
|
|
if (RUBY_DTRACE_METHOD_ENTRY_ENABLED() ||
|
|
|
|
RUBY_DTRACE_METHOD_RETURN_ENABLED() ||
|
|
|
|
RUBY_DTRACE_CMETHOD_ENTRY_ENABLED() ||
|
|
|
|
RUBY_DTRACE_CMETHOD_RETURN_ENABLED()) {
|
|
|
|
|
|
|
|
switch (flag) {
|
|
|
|
case RUBY_EVENT_CALL:
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_METHOD_ENTRY_HOOK(ec, 0, 0);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return;
|
|
|
|
case RUBY_EVENT_C_CALL:
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_CMETHOD_ENTRY_HOOK(ec, 0, 0);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return;
|
|
|
|
case RUBY_EVENT_RETURN:
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_METHOD_RETURN_HOOK(ec, 0, 0);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return;
|
|
|
|
case RUBY_EVENT_C_RETURN:
|
2017-11-07 11:19:25 +03:00
|
|
|
RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, 0, 0);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_const_get_under(ID id, rb_num_t flags, VALUE cbase)
|
|
|
|
{
|
2022-06-21 01:51:05 +03:00
|
|
|
if (!rb_const_defined_at(cbase, id)) {
|
|
|
|
return 0;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (VM_DEFINECLASS_SCOPED_P(flags)) {
|
2022-06-21 01:51:05 +03:00
|
|
|
return rb_public_const_get_at(cbase, id);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-06-21 01:51:05 +03:00
|
|
|
return rb_const_get_at(cbase, id);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_check_if_class(ID id, rb_num_t flags, VALUE super, VALUE klass)
|
|
|
|
{
|
|
|
|
if (!RB_TYPE_P(klass, T_CLASS)) {
|
2019-07-04 10:54:34 +03:00
|
|
|
return 0;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (VM_DEFINECLASS_HAS_SUPERCLASS_P(flags)) {
|
|
|
|
VALUE tmp = rb_class_real(RCLASS_SUPER(klass));
|
|
|
|
|
|
|
|
if (tmp != super) {
|
|
|
|
rb_raise(rb_eTypeError,
|
|
|
|
"superclass mismatch for class %"PRIsVALUE"",
|
|
|
|
rb_id2str(id));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return klass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return klass;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_check_if_module(ID id, VALUE mod)
|
|
|
|
{
|
|
|
|
if (!RB_TYPE_P(mod, T_MODULE)) {
|
2019-07-04 10:54:34 +03:00
|
|
|
return 0;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return mod;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-09 19:08:42 +03:00
|
|
|
static VALUE
|
|
|
|
declare_under(ID id, VALUE cbase, VALUE c)
|
|
|
|
{
|
|
|
|
rb_set_class_path_string(c, cbase, rb_id2str(id));
|
|
|
|
rb_const_set(cbase, id, c);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_declare_class(ID id, rb_num_t flags, VALUE cbase, VALUE super)
|
|
|
|
{
|
|
|
|
/* new class declaration */
|
|
|
|
VALUE s = VM_DEFINECLASS_HAS_SUPERCLASS_P(flags) ? super : rb_cObject;
|
2019-10-09 19:08:42 +03:00
|
|
|
VALUE c = declare_under(id, cbase, rb_define_class_id(id, s));
|
2020-11-12 23:15:30 +03:00
|
|
|
rb_define_alloc_func(c, rb_get_alloc_func(c));
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_class_inherited(s, c);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_declare_module(ID id, VALUE cbase)
|
|
|
|
{
|
|
|
|
/* new module declaration */
|
2020-11-26 01:05:06 +03:00
|
|
|
return declare_under(id, cbase, rb_module_new());
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
|
2019-07-04 10:54:34 +03:00
|
|
|
NORETURN(static void unmatched_redefinition(const char *type, VALUE cbase, ID id, VALUE old));
|
|
|
|
static void
|
|
|
|
unmatched_redefinition(const char *type, VALUE cbase, ID id, VALUE old)
|
|
|
|
{
|
|
|
|
VALUE name = rb_id2str(id);
|
|
|
|
VALUE message = rb_sprintf("%"PRIsVALUE" is not a %s",
|
|
|
|
name, type);
|
|
|
|
VALUE location = rb_const_source_location_at(cbase, id);
|
|
|
|
if (!NIL_P(location)) {
|
|
|
|
rb_str_catf(message, "\n%"PRIsVALUE":%"PRIsVALUE":"
|
|
|
|
" previous definition of %"PRIsVALUE" was here",
|
|
|
|
rb_ary_entry(location, 0), rb_ary_entry(location, 1), name);
|
|
|
|
}
|
|
|
|
rb_exc_raise(rb_exc_new_str(rb_eTypeError, message));
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_define_class(ID id, rb_num_t flags, VALUE cbase, VALUE super)
|
|
|
|
{
|
|
|
|
VALUE klass;
|
|
|
|
|
|
|
|
if (VM_DEFINECLASS_HAS_SUPERCLASS_P(flags) && !RB_TYPE_P(super, T_CLASS)) {
|
|
|
|
rb_raise(rb_eTypeError,
|
2020-07-18 18:18:39 +03:00
|
|
|
"superclass must be an instance of Class (given an instance of %"PRIsVALUE")",
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_obj_class(super));
|
|
|
|
}
|
|
|
|
|
|
|
|
vm_check_if_namespace(cbase);
|
|
|
|
|
|
|
|
/* find klass */
|
|
|
|
rb_autoload_load(cbase, id);
|
|
|
|
if ((klass = vm_const_get_under(id, flags, cbase)) != 0) {
|
2019-07-04 10:54:34 +03:00
|
|
|
if (!vm_check_if_class(id, flags, super, klass))
|
|
|
|
unmatched_redefinition("class", cbase, id, klass);
|
|
|
|
return klass;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return vm_declare_class(id, flags, cbase, super);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_define_module(ID id, rb_num_t flags, VALUE cbase)
|
|
|
|
{
|
|
|
|
VALUE mod;
|
|
|
|
|
|
|
|
vm_check_if_namespace(cbase);
|
|
|
|
if ((mod = vm_const_get_under(id, flags, cbase)) != 0) {
|
2019-07-04 10:54:34 +03:00
|
|
|
if (!vm_check_if_module(id, mod))
|
|
|
|
unmatched_redefinition("module", cbase, id, mod);
|
|
|
|
return mod;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return vm_declare_module(id, cbase);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_find_or_create_class_by_id(ID id,
|
|
|
|
rb_num_t flags,
|
|
|
|
VALUE cbase,
|
|
|
|
VALUE super)
|
|
|
|
{
|
|
|
|
rb_vm_defineclass_type_t type = VM_DEFINECLASS_TYPE(flags);
|
|
|
|
|
|
|
|
switch (type) {
|
2017-04-18 16:14:08 +03:00
|
|
|
case VM_DEFINECLASS_TYPE_CLASS:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
/* classdef returns class scope value */
|
|
|
|
return vm_define_class(id, flags, cbase, super);
|
|
|
|
|
2017-04-18 16:14:08 +03:00
|
|
|
case VM_DEFINECLASS_TYPE_SINGLETON_CLASS:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
/* classdef returns class scope value */
|
|
|
|
return rb_singleton_class(cbase);
|
|
|
|
|
2017-04-18 16:14:08 +03:00
|
|
|
case VM_DEFINECLASS_TYPE_MODULE:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
/* classdef returns class scope value */
|
|
|
|
return vm_define_module(id, flags, cbase);
|
|
|
|
|
2017-04-18 16:14:08 +03:00
|
|
|
default:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_bug("unknown defineclass type: %d", (int)type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-05 11:15:11 +03:00
|
|
|
static rb_method_visibility_t
|
|
|
|
vm_scope_visibility_get(const rb_execution_context_t *ec)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(ec, ec->cfp);
|
|
|
|
|
|
|
|
if (!vm_env_cref_by_cref(cfp->ep)) {
|
2019-04-05 11:15:21 +03:00
|
|
|
return METHOD_VISI_PUBLIC;
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
else {
|
2019-04-05 11:15:21 +03:00
|
|
|
return CREF_SCOPE_VISI(vm_ec_cref(ec))->method_visi;
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vm_scope_module_func_check(const rb_execution_context_t *ec)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(ec, ec->cfp);
|
|
|
|
|
|
|
|
if (!vm_env_cref_by_cref(cfp->ep)) {
|
2019-04-05 11:15:21 +03:00
|
|
|
return FALSE;
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
else {
|
2019-04-05 11:15:21 +03:00
|
|
|
return CREF_SCOPE_VISI(vm_ec_cref(ec))->module_func;
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
vm_define_method(const rb_execution_context_t *ec, VALUE obj, ID id, VALUE iseqval, int is_singleton)
|
|
|
|
{
|
|
|
|
VALUE klass;
|
|
|
|
rb_method_visibility_t visi;
|
|
|
|
rb_cref_t *cref = vm_ec_cref(ec);
|
|
|
|
|
2021-12-03 02:53:39 +03:00
|
|
|
if (is_singleton) {
|
2019-04-05 11:15:21 +03:00
|
|
|
klass = rb_singleton_class(obj); /* class and frozen checked in this API */
|
|
|
|
visi = METHOD_VISI_PUBLIC;
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
2021-12-03 02:53:39 +03:00
|
|
|
else {
|
|
|
|
klass = CREF_CLASS_FOR_DEFINITION(cref);
|
|
|
|
visi = vm_scope_visibility_get(ec);
|
|
|
|
}
|
2019-04-05 11:15:11 +03:00
|
|
|
|
|
|
|
if (NIL_P(klass)) {
|
2019-04-05 11:15:21 +03:00
|
|
|
rb_raise(rb_eTypeError, "no class/module to add method");
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
rb_add_method_iseq(klass, id, (const rb_iseq_t *)iseqval, cref, visi);
|
2022-11-22 23:28:14 +03:00
|
|
|
// Set max_iv_count on klasses based on number of ivar sets that are in the initialize method
|
2023-10-24 22:38:50 +03:00
|
|
|
if (id == idInitialize && klass != rb_cObject && RB_TYPE_P(klass, T_CLASS) && (rb_get_alloc_func(klass) == rb_class_allocate_instance)) {
|
2022-11-22 23:28:14 +03:00
|
|
|
|
|
|
|
RCLASS_EXT(klass)->max_iv_count = rb_estimate_iv_count(klass, (const rb_iseq_t *)iseqval);
|
|
|
|
}
|
2019-04-05 11:15:11 +03:00
|
|
|
|
|
|
|
if (!is_singleton && vm_scope_module_func_check(ec)) {
|
2019-04-05 11:15:21 +03:00
|
|
|
klass = rb_singleton_class(klass);
|
|
|
|
rb_add_method_iseq(klass, id, (const rb_iseq_t *)iseqval, cref, METHOD_VISI_PUBLIC);
|
2019-04-05 11:15:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-26 03:59:37 +03:00
|
|
|
static VALUE
|
2020-12-14 23:40:38 +03:00
|
|
|
vm_invokeblock_i(struct rb_execution_context_struct *ec,
|
|
|
|
struct rb_control_frame_struct *reg_cfp,
|
|
|
|
struct rb_calling_info *calling)
|
2018-12-26 03:59:37 +03:00
|
|
|
{
|
2023-07-31 10:04:16 +03:00
|
|
|
const struct rb_callinfo *ci = calling->cd->ci;
|
2018-12-26 03:59:37 +03:00
|
|
|
VALUE block_handler = VM_CF_BLOCK_HANDLER(GET_CFP());
|
|
|
|
|
|
|
|
if (block_handler == VM_BLOCK_HANDLER_NONE) {
|
2018-12-26 03:59:40 +03:00
|
|
|
rb_vm_localjump_error("no block given (yield)", Qnil, 0);
|
2018-12-26 03:59:37 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-05-26 04:14:17 +03:00
|
|
|
return vm_invoke_block(ec, GET_CFP(), calling, ci, false, block_handler);
|
2018-12-26 03:59:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-17 09:46:36 +03:00
|
|
|
enum method_explorer_type {
|
|
|
|
mexp_search_method,
|
|
|
|
mexp_search_invokeblock,
|
|
|
|
mexp_search_super,
|
|
|
|
};
|
2020-12-14 23:40:38 +03:00
|
|
|
|
2023-03-07 09:03:39 +03:00
|
|
|
static inline VALUE
|
2018-12-26 03:59:37 +03:00
|
|
|
vm_sendish(
|
|
|
|
struct rb_execution_context_struct *ec,
|
|
|
|
struct rb_control_frame_struct *reg_cfp,
|
2019-07-31 04:36:05 +03:00
|
|
|
struct rb_call_data *cd,
|
2018-12-26 03:59:37 +03:00
|
|
|
VALUE block_handler,
|
2020-12-19 10:30:09 +03:00
|
|
|
enum method_explorer_type method_explorer
|
|
|
|
) {
|
2021-01-17 12:07:33 +03:00
|
|
|
VALUE val = Qundef;
|
2020-12-17 09:46:36 +03:00
|
|
|
const struct rb_callinfo *ci = cd->ci;
|
|
|
|
const struct rb_callcache *cc;
|
|
|
|
int argc = vm_ci_argc(ci);
|
2018-12-26 03:59:37 +03:00
|
|
|
VALUE recv = TOPN(argc);
|
2020-12-14 23:40:38 +03:00
|
|
|
struct rb_calling_info calling = {
|
|
|
|
.block_handler = block_handler,
|
|
|
|
.kw_splat = IS_ARGS_KW_SPLAT(ci) > 0,
|
|
|
|
.recv = recv,
|
|
|
|
.argc = argc,
|
2023-07-31 10:04:16 +03:00
|
|
|
.cd = cd,
|
2020-12-14 23:40:38 +03:00
|
|
|
};
|
2018-12-26 03:59:37 +03:00
|
|
|
|
2020-12-17 09:46:36 +03:00
|
|
|
switch (method_explorer) {
|
|
|
|
case mexp_search_method:
|
|
|
|
calling.cc = cc = vm_search_method_fastpath((VALUE)reg_cfp->iseq, cd, CLASS_OF(recv));
|
|
|
|
val = vm_cc_call(cc)(ec, GET_CFP(), &calling);
|
|
|
|
break;
|
|
|
|
case mexp_search_super:
|
|
|
|
calling.cc = cc = vm_search_super_method(reg_cfp, cd, recv);
|
|
|
|
val = vm_cc_call(cc)(ec, GET_CFP(), &calling);
|
|
|
|
break;
|
|
|
|
case mexp_search_invokeblock:
|
|
|
|
val = vm_invokeblock_i(ec, GET_CFP(), &calling);
|
|
|
|
break;
|
|
|
|
}
|
2023-03-14 23:39:06 +03:00
|
|
|
return val;
|
2018-12-26 03:59:37 +03:00
|
|
|
}
|
|
|
|
|
2023-07-24 23:51:46 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_send(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, CALL_DATA cd, ISEQ blockiseq)
|
|
|
|
{
|
2023-09-15 00:18:45 +03:00
|
|
|
stack_check(ec);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
|
|
|
struct rb_forwarding_call_data adjusted_cd;
|
|
|
|
struct rb_callinfo adjusted_ci;
|
|
|
|
|
2024-06-04 00:20:04 +03:00
|
|
|
VALUE bh;
|
2024-06-04 01:48:13 +03:00
|
|
|
VALUE val;
|
2024-06-04 00:20:04 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (vm_ci_flag(cd->ci) & VM_CALL_FORWARDING) {
|
|
|
|
bh = vm_caller_setup_fwd_args(GET_EC(), GET_CFP(), cd, blockiseq, false, &adjusted_cd, &adjusted_ci);
|
2024-06-04 00:20:04 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), &adjusted_cd.cd, bh, mexp_search_method);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (cd->cc != adjusted_cd.cd.cc && vm_cc_markable(adjusted_cd.cd.cc)) {
|
|
|
|
RB_OBJ_WRITE(GET_ISEQ(), &cd->cc, adjusted_cd.cd.cc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, false);
|
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_method);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
|
2023-07-24 23:51:46 +03:00
|
|
|
VM_EXEC(ec, val);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_vm_opt_send_without_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, CALL_DATA cd)
|
|
|
|
{
|
2023-09-15 00:18:45 +03:00
|
|
|
stack_check(ec);
|
2023-07-24 23:51:46 +03:00
|
|
|
VALUE bh = VM_BLOCK_HANDLER_NONE;
|
|
|
|
VALUE val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_method);
|
|
|
|
VM_EXEC(ec, val);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_vm_invokesuper(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, CALL_DATA cd, ISEQ blockiseq)
|
|
|
|
{
|
2023-09-15 00:18:45 +03:00
|
|
|
stack_check(ec);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
struct rb_forwarding_call_data adjusted_cd;
|
|
|
|
struct rb_callinfo adjusted_ci;
|
|
|
|
|
2024-06-04 00:20:04 +03:00
|
|
|
VALUE bh;
|
2024-06-04 01:48:13 +03:00
|
|
|
VALUE val;
|
2024-06-04 00:20:04 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (vm_ci_flag(cd->ci) & VM_CALL_FORWARDING) {
|
|
|
|
bh = vm_caller_setup_fwd_args(GET_EC(), GET_CFP(), cd, blockiseq, true, &adjusted_cd, &adjusted_ci);
|
2024-06-04 00:20:04 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), &adjusted_cd.cd, bh, mexp_search_super);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (cd->cc != adjusted_cd.cd.cc && vm_cc_markable(adjusted_cd.cd.cc)) {
|
|
|
|
RB_OBJ_WRITE(GET_ISEQ(), &cd->cc, adjusted_cd.cd.cc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, true);
|
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_super);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
|
2023-07-24 23:51:46 +03:00
|
|
|
VM_EXEC(ec, val);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_vm_invokeblock(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, CALL_DATA cd)
|
|
|
|
{
|
2023-09-15 00:18:45 +03:00
|
|
|
stack_check(ec);
|
2023-07-24 23:51:46 +03:00
|
|
|
VALUE bh = VM_BLOCK_HANDLER_NONE;
|
|
|
|
VALUE val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_invokeblock);
|
|
|
|
VM_EXEC(ec, val);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
/* object.c */
|
|
|
|
VALUE rb_nil_to_s(VALUE);
|
|
|
|
VALUE rb_true_to_s(VALUE);
|
|
|
|
VALUE rb_false_to_s(VALUE);
|
|
|
|
/* numeric.c */
|
|
|
|
VALUE rb_int_to_s(int argc, VALUE *argv, VALUE x);
|
|
|
|
VALUE rb_fix_to_s(VALUE);
|
|
|
|
/* variable.c */
|
|
|
|
VALUE rb_mod_to_s(VALUE);
|
|
|
|
VALUE rb_mod_name(VALUE);
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
|
|
|
|
{
|
2022-09-08 15:07:43 +03:00
|
|
|
int type = TYPE(recv);
|
|
|
|
if (type == T_STRING) {
|
|
|
|
return recv;
|
|
|
|
}
|
|
|
|
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
const struct rb_callcache *cc = vm_search_method((VALUE)iseq, cd, recv);
|
|
|
|
|
2022-09-08 15:07:43 +03:00
|
|
|
switch (type) {
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
case T_SYMBOL:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_sym_to_s)) {
|
|
|
|
// rb_sym_to_s() allocates a mutable string, but since we are only
|
|
|
|
// going to use this string for interpolation, it's fine to use the
|
|
|
|
// frozen string.
|
|
|
|
return rb_sym2str(recv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T_MODULE:
|
|
|
|
case T_CLASS:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_mod_to_s)) {
|
|
|
|
// rb_mod_to_s() allocates a mutable string, but since we are only
|
|
|
|
// going to use this string for interpolation, it's fine to use the
|
|
|
|
// frozen string.
|
|
|
|
VALUE val = rb_mod_name(recv);
|
2022-06-29 15:59:39 +03:00
|
|
|
if (NIL_P(val)) {
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
val = rb_mod_to_s(recv);
|
|
|
|
}
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T_NIL:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_nil_to_s)) {
|
|
|
|
return rb_nil_to_s(recv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T_TRUE:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_true_to_s)) {
|
|
|
|
return rb_true_to_s(recv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T_FALSE:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_false_to_s)) {
|
|
|
|
return rb_false_to_s(recv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T_FIXNUM:
|
|
|
|
if (check_cfunc(vm_cc_cme(cc), rb_int_to_s)) {
|
|
|
|
return rb_fix_to_s(recv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
|
2024-07-29 13:15:02 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_ary_freeze(VALUE ary, int bop, ID id)
|
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(bop, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
return ary;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-05 13:31:24 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_hash_freeze(VALUE hash, int bop, ID id)
|
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(bop, HASH_REDEFINED_OP_FLAG)) {
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-27 04:10:02 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_str_freeze(VALUE str, int bop, ID id)
|
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(bop, STRING_REDEFINED_OP_FLAG)) {
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
else {
|
2018-09-12 06:39:36 +03:00
|
|
|
return Qundef;
|
2018-06-27 04:10:02 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
/* this macro is mandatory to use OPTIMIZED_CMP. What a design! */
|
|
|
|
#define id_cmp idCmp
|
|
|
|
|
|
|
|
static VALUE
|
2021-10-01 01:18:14 +03:00
|
|
|
vm_opt_newarray_max(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_MAX, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
if (num == 0) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
else {
|
2018-01-11 11:26:21 +03:00
|
|
|
VALUE result = *ptr;
|
2018-11-13 03:40:52 +03:00
|
|
|
rb_snum_t i = num - 1;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
while (i-- > 0) {
|
2018-01-11 11:26:21 +03:00
|
|
|
const VALUE v = *++ptr;
|
2022-11-23 05:16:11 +03:00
|
|
|
if (OPTIMIZED_CMP(v, result) > 0) {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
result = v;
|
|
|
|
}
|
|
|
|
}
|
2018-01-11 11:26:21 +03:00
|
|
|
return result;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2021-09-30 22:35:27 +03:00
|
|
|
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idMax, 0, NULL, RB_NO_KEYWORDS);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-12 18:19:24 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_newarray_max(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
|
|
|
{
|
|
|
|
return vm_opt_newarray_max(ec, num, ptr);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
2021-10-01 01:18:14 +03:00
|
|
|
vm_opt_newarray_min(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_MIN, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
if (num == 0) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
else {
|
2018-01-11 11:26:21 +03:00
|
|
|
VALUE result = *ptr;
|
2018-11-13 03:40:52 +03:00
|
|
|
rb_snum_t i = num - 1;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
while (i-- > 0) {
|
2018-01-11 11:26:21 +03:00
|
|
|
const VALUE v = *++ptr;
|
2022-11-23 05:16:11 +03:00
|
|
|
if (OPTIMIZED_CMP(v, result) < 0) {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
result = v;
|
|
|
|
}
|
|
|
|
}
|
2018-01-11 11:26:21 +03:00
|
|
|
return result;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2021-09-30 22:35:27 +03:00
|
|
|
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idMin, 0, NULL, RB_NO_KEYWORDS);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-09 01:31:33 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_newarray_min(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
|
|
|
{
|
|
|
|
return vm_opt_newarray_min(ec, num, ptr);
|
|
|
|
}
|
|
|
|
|
Emit special instruction for array literal + .(hash|min|max)
This commit introduces a new instruction `opt_newarray_send` which is
used when there is an array literal followed by either the `hash`,
`min`, or `max` method.
```
[a, b, c].hash
```
Will emit an `opt_newarray_send` instruction. This instruction falls
back to a method call if the "interested" method has been monkey
patched.
Here are some examples of the instructions generated:
```
$ ./miniruby --dump=insns -e '[@a, @b].max'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :max
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].min'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :min
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].hash'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :hash
0009 leave
```
[Feature #18897] [ruby-core:109147]
Co-authored-by: John Hawthorn <jhawthorn@github.com>
2022-06-07 03:27:56 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_newarray_hash(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
|
|
|
{
|
|
|
|
// If Array#hash is _not_ monkeypatched, use the optimized call
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_HASH, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_ary_hash_values(num, ptr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idHash, 0, NULL, RB_NO_KEYWORDS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-18 23:16:14 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_newarray_hash(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
|
|
|
|
{
|
|
|
|
return vm_opt_newarray_hash(ec, num, ptr);
|
|
|
|
}
|
|
|
|
|
2024-08-08 15:11:37 +03:00
|
|
|
VALUE rb_setup_fake_ary(struct RArray *fake_ary, const VALUE *list, long len);
|
Introduce a specialize instruction for Array#pack
Instructions for this code:
```ruby
# frozen_string_literal: true
[a].pack("C")
```
Before this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 newarray 1
0005 putobject "C"
0007 opt_send_without_block <calldata!mid:pack, argc:1, ARGS_SIMPLE>
0009 leave
```
After this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 putobject "C"
0005 opt_newarray_send 2, :pack
0008 leave
```
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2024-05-23 21:23:26 +03:00
|
|
|
VALUE rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer);
|
|
|
|
|
Expand opt_newarray_send to support Array#pack with buffer keyword arg
Use an enum for the method arg instead of needing to add an id
that doesn't map to an actual method name.
$ ruby --dump=insns -e 'b = "x"; [v].pack("E*", buffer: b)'
before:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 newarray 1
0009 putchilledstring "E*"
0011 getlocal_WC_0 b@0
0013 opt_send_without_block <calldata!mid:pack, argc:2, kw:[#<Symbol:0x000000000023110c>], KWARG>
0015 leave
```
after:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 putchilledstring "E*"
0009 getlocal b@0, 0
0012 opt_newarray_send 3, 5
0015 leave
```
2024-07-20 20:03:02 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_newarray_pack_buffer(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt, VALUE buffer)
|
Introduce a specialize instruction for Array#pack
Instructions for this code:
```ruby
# frozen_string_literal: true
[a].pack("C")
```
Before this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 newarray 1
0005 putobject "C"
0007 opt_send_without_block <calldata!mid:pack, argc:1, ARGS_SIMPLE>
0009 leave
```
After this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 putobject "C"
0005 opt_newarray_send 2, :pack
0008 leave
```
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2024-05-23 21:23:26 +03:00
|
|
|
{
|
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_PACK, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
struct RArray fake_ary;
|
2024-08-08 15:11:37 +03:00
|
|
|
VALUE ary = rb_setup_fake_ary(&fake_ary, ptr, num);
|
Expand opt_newarray_send to support Array#pack with buffer keyword arg
Use an enum for the method arg instead of needing to add an id
that doesn't map to an actual method name.
$ ruby --dump=insns -e 'b = "x"; [v].pack("E*", buffer: b)'
before:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 newarray 1
0009 putchilledstring "E*"
0011 getlocal_WC_0 b@0
0013 opt_send_without_block <calldata!mid:pack, argc:2, kw:[#<Symbol:0x000000000023110c>], KWARG>
0015 leave
```
after:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 putchilledstring "E*"
0009 getlocal b@0, 0
0012 opt_newarray_send 3, 5
0015 leave
```
2024-07-20 20:03:02 +03:00
|
|
|
return rb_ec_pack_ary(ec, ary, fmt, (UNDEF_P(buffer) ? Qnil : buffer));
|
Introduce a specialize instruction for Array#pack
Instructions for this code:
```ruby
# frozen_string_literal: true
[a].pack("C")
```
Before this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 newarray 1
0005 putobject "C"
0007 opt_send_without_block <calldata!mid:pack, argc:1, ARGS_SIMPLE>
0009 leave
```
After this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 putobject "C"
0005 opt_newarray_send 2, :pack
0008 leave
```
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2024-05-23 21:23:26 +03:00
|
|
|
}
|
|
|
|
else {
|
Expand opt_newarray_send to support Array#pack with buffer keyword arg
Use an enum for the method arg instead of needing to add an id
that doesn't map to an actual method name.
$ ruby --dump=insns -e 'b = "x"; [v].pack("E*", buffer: b)'
before:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 newarray 1
0009 putchilledstring "E*"
0011 getlocal_WC_0 b@0
0013 opt_send_without_block <calldata!mid:pack, argc:2, kw:[#<Symbol:0x000000000023110c>], KWARG>
0015 leave
```
after:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 putchilledstring "E*"
0009 getlocal b@0, 0
0012 opt_newarray_send 3, 5
0015 leave
```
2024-07-20 20:03:02 +03:00
|
|
|
// The opt_newarray_send insn drops the keyword args so we need to rebuild them.
|
|
|
|
// Setup an array with room for keyword hash.
|
|
|
|
VALUE args[2];
|
|
|
|
args[0] = fmt;
|
|
|
|
int kw_splat = RB_NO_KEYWORDS;
|
|
|
|
int argc = 1;
|
|
|
|
|
|
|
|
if (!UNDEF_P(buffer)) {
|
|
|
|
args[1] = rb_hash_new_with_size(1);
|
|
|
|
rb_hash_aset(args[1], ID2SYM(idBuffer), buffer);
|
|
|
|
kw_splat = RB_PASS_KEYWORDS;
|
|
|
|
argc++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idPack, argc, args, kw_splat);
|
Introduce a specialize instruction for Array#pack
Instructions for this code:
```ruby
# frozen_string_literal: true
[a].pack("C")
```
Before this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 newarray 1
0005 putobject "C"
0007 opt_send_without_block <calldata!mid:pack, argc:1, ARGS_SIMPLE>
0009 leave
```
After this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 putobject "C"
0005 opt_newarray_send 2, :pack
0008 leave
```
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2024-05-23 21:23:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Expand opt_newarray_send to support Array#pack with buffer keyword arg
Use an enum for the method arg instead of needing to add an id
that doesn't map to an actual method name.
$ ruby --dump=insns -e 'b = "x"; [v].pack("E*", buffer: b)'
before:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 newarray 1
0009 putchilledstring "E*"
0011 getlocal_WC_0 b@0
0013 opt_send_without_block <calldata!mid:pack, argc:2, kw:[#<Symbol:0x000000000023110c>], KWARG>
0015 leave
```
after:
```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring "x" ( 1)[Li]
0002 setlocal_WC_0 b@0
0004 putself
0005 opt_send_without_block <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 putchilledstring "E*"
0009 getlocal b@0, 0
0012 opt_newarray_send 3, 5
0015 leave
```
2024-07-20 20:03:02 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_newarray_pack_buffer(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt, VALUE buffer)
|
|
|
|
{
|
|
|
|
return vm_opt_newarray_pack_buffer(ec, num, ptr, fmt, buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_vm_opt_newarray_pack(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt)
|
|
|
|
{
|
|
|
|
return vm_opt_newarray_pack_buffer(ec, num, ptr, fmt, Qundef);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
#undef id_cmp
|
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
static void
|
|
|
|
vm_track_constant_cache(ID id, void *ic)
|
2022-03-31 18:04:25 +03:00
|
|
|
{
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
struct rb_id_table *const_cache = GET_VM()->constant_cache;
|
|
|
|
VALUE lookup_result;
|
|
|
|
st_table *ics;
|
2022-03-31 18:04:25 +03:00
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
if (rb_id_table_lookup(const_cache, id, &lookup_result)) {
|
|
|
|
ics = (st_table *)lookup_result;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ics = st_init_numtable();
|
|
|
|
rb_id_table_insert(const_cache, id, (VALUE)ics);
|
2022-03-31 18:04:25 +03:00
|
|
|
}
|
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
st_insert(ics, (st_data_t) ic, (st_data_t) Qtrue);
|
2022-03-31 18:04:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
vm_ic_track_const_chain(rb_control_frame_t *cfp, IC ic, const ID *segments)
|
2022-03-31 18:04:25 +03:00
|
|
|
{
|
2022-04-08 06:29:02 +03:00
|
|
|
RB_VM_LOCK_ENTER();
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
|
|
|
|
for (int i = 0; segments[i]; i++) {
|
|
|
|
ID id = segments[i];
|
|
|
|
if (id == idNULL) continue;
|
|
|
|
vm_track_constant_cache(id, ic);
|
2022-04-08 06:29:02 +03:00
|
|
|
}
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
|
2022-04-08 06:29:02 +03:00
|
|
|
RB_VM_LOCK_LEAVE();
|
2022-03-31 18:04:25 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 10:15:30 +03:00
|
|
|
// For RJIT inlining
|
2021-01-05 00:09:01 +03:00
|
|
|
static inline bool
|
2022-03-31 18:04:25 +03:00
|
|
|
vm_inlined_ic_hit_p(VALUE flags, VALUE value, const rb_cref_t *ic_cref, const VALUE *reg_ep)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
2022-03-31 18:04:25 +03:00
|
|
|
if ((flags & IMEMO_CONST_CACHE_SHAREABLE) || rb_ractor_main_p()) {
|
2022-10-01 09:58:47 +03:00
|
|
|
VM_ASSERT(ractor_incidental_shareable_p(flags & IMEMO_CONST_CACHE_SHAREABLE, value));
|
2021-01-04 12:08:25 +03:00
|
|
|
|
2021-01-05 00:09:01 +03:00
|
|
|
return (ic_cref == NULL || // no need to check CREF
|
|
|
|
ic_cref == vm_get_cref(reg_ep));
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2021-01-05 00:09:01 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
vm_ic_hit_p(const struct iseq_inline_constant_cache_entry *ice, const VALUE *reg_ep)
|
|
|
|
{
|
|
|
|
VM_ASSERT(IMEMO_TYPE_P(ice, imemo_constcache));
|
2022-03-31 18:04:25 +03:00
|
|
|
return vm_inlined_ic_hit_p(ice->flags, ice->value, ice->ic_cref, reg_ep);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
|
2021-10-18 18:30:18 +03:00
|
|
|
// YJIT needs this function to never allocate and never raise
|
2021-09-22 01:16:23 +03:00
|
|
|
bool
|
|
|
|
rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep)
|
|
|
|
{
|
|
|
|
return ic->entry && vm_ic_hit_p(ic->entry, reg_ep);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static void
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
vm_ic_update(const rb_iseq_t *iseq, IC ic, VALUE val, const VALUE *reg_ep, const VALUE *pc)
|
2021-01-04 12:08:25 +03:00
|
|
|
{
|
2022-03-31 18:04:25 +03:00
|
|
|
if (ruby_vm_const_missing_count > 0) {
|
|
|
|
ruby_vm_const_missing_count = 0;
|
|
|
|
ic->entry = NULL;
|
|
|
|
return;
|
|
|
|
}
|
2021-01-04 12:08:25 +03:00
|
|
|
|
2024-02-20 23:58:10 +03:00
|
|
|
struct iseq_inline_constant_cache_entry *ice = IMEMO_NEW(struct iseq_inline_constant_cache_entry, imemo_constcache, 0);
|
2021-01-04 12:08:25 +03:00
|
|
|
RB_OBJ_WRITE(ice, &ice->value, val);
|
|
|
|
ice->ic_cref = vm_get_const_key_cref(reg_ep);
|
|
|
|
if (rb_ractor_shareable_p(val)) ice->flags |= IMEMO_CONST_CACHE_SHAREABLE;
|
|
|
|
RB_OBJ_WRITE(iseq, &ic->entry, ice);
|
2022-12-24 12:13:40 +03:00
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
RUBY_ASSERT(pc >= ISEQ_BODY(iseq)->iseq_encoded);
|
|
|
|
unsigned pos = (unsigned)(pc - ISEQ_BODY(iseq)->iseq_encoded);
|
|
|
|
rb_yjit_constant_ic_update(iseq, ic, pos);
|
2023-03-07 10:17:25 +03:00
|
|
|
rb_rjit_constant_ic_update(iseq, ic, pos);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
|
2023-10-13 18:52:23 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_getconstant_path(rb_execution_context_t *ec, rb_control_frame_t *const reg_cfp, IC ic)
|
|
|
|
{
|
|
|
|
VALUE val;
|
|
|
|
const ID *segments = ic->segments;
|
|
|
|
struct iseq_inline_constant_cache_entry *ice = ic->entry;
|
|
|
|
if (ice && vm_ic_hit_p(ice, GET_EP())) {
|
|
|
|
val = ice->value;
|
|
|
|
|
|
|
|
VM_ASSERT(val == vm_get_ev_const_chain(ec, segments));
|
2024-01-07 18:50:41 +03:00
|
|
|
}
|
|
|
|
else {
|
2023-10-13 18:52:23 +03:00
|
|
|
ruby_vm_constant_cache_misses++;
|
|
|
|
val = vm_get_ev_const_chain(ec, segments);
|
|
|
|
vm_ic_track_const_chain(GET_CFP(), ic, segments);
|
|
|
|
// Undo the PC increment to get the address to this instruction
|
|
|
|
// INSN_ATTR(width) == 2
|
|
|
|
vm_ic_update(GET_ISEQ(), ic, val, GET_EP(), GET_PC() - 2);
|
|
|
|
}
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
2018-03-19 21:21:54 +03:00
|
|
|
vm_once_dispatch(rb_execution_context_t *ec, ISEQ iseq, ISE is)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
2017-11-07 09:14:00 +03:00
|
|
|
rb_thread_t *th = rb_ec_thread_ptr(ec);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_thread_t *const RUNNING_THREAD_ONCE_DONE = (rb_thread_t *)(0x1);
|
|
|
|
|
2017-04-19 18:14:03 +03:00
|
|
|
again:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
if (is->once.running_thread == RUNNING_THREAD_ONCE_DONE) {
|
|
|
|
return is->once.value;
|
|
|
|
}
|
|
|
|
else if (is->once.running_thread == NULL) {
|
2017-04-18 14:06:58 +03:00
|
|
|
VALUE val;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
is->once.running_thread = th;
|
2018-03-19 21:21:54 +03:00
|
|
|
val = rb_ensure(vm_once_exec, (VALUE)iseq, vm_once_clear, (VALUE)is);
|
|
|
|
RB_OBJ_WRITE(ec->cfp->iseq, &is->once.value, val);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
/* is->once.running_thread is cleared by vm_once_clear() */
|
|
|
|
is->once.running_thread = RUNNING_THREAD_ONCE_DONE; /* success */
|
2017-04-18 14:06:58 +03:00
|
|
|
return val;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (is->once.running_thread == th) {
|
|
|
|
/* recursive once */
|
|
|
|
return vm_once_exec((VALUE)iseq);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* waiting for finish */
|
2017-11-07 09:14:00 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
rb_thread_schedule();
|
2017-04-19 18:14:03 +03:00
|
|
|
goto again;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static OFFSET
|
|
|
|
vm_case_dispatch(CDHASH hash, OFFSET else_offset, VALUE key)
|
|
|
|
{
|
|
|
|
switch (OBJ_BUILTIN_TYPE(key)) {
|
2017-04-18 16:14:08 +03:00
|
|
|
case -1:
|
|
|
|
case T_FLOAT:
|
|
|
|
case T_SYMBOL:
|
|
|
|
case T_BIGNUM:
|
|
|
|
case T_STRING:
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
if (BASIC_OP_UNREDEFINED_P(BOP_EQQ,
|
|
|
|
SYMBOL_REDEFINED_OP_FLAG |
|
|
|
|
INTEGER_REDEFINED_OP_FLAG |
|
|
|
|
FLOAT_REDEFINED_OP_FLAG |
|
|
|
|
NIL_REDEFINED_OP_FLAG |
|
|
|
|
TRUE_REDEFINED_OP_FLAG |
|
|
|
|
FALSE_REDEFINED_OP_FLAG |
|
|
|
|
STRING_REDEFINED_OP_FLAG)) {
|
|
|
|
st_data_t val;
|
|
|
|
if (RB_FLOAT_TYPE_P(key)) {
|
|
|
|
double kval = RFLOAT_VALUE(key);
|
|
|
|
if (!isinf(kval) && modf(kval, &kval) == 0.0) {
|
|
|
|
key = FIXABLE(kval) ? LONG2FIX((long)kval) : rb_dbl2big(kval);
|
|
|
|
}
|
|
|
|
}
|
2018-10-31 01:12:12 +03:00
|
|
|
if (rb_hash_stlike_lookup(hash, key, &val)) {
|
2018-09-12 04:55:00 +03:00
|
|
|
return FIX2LONG((VALUE)val);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return else_offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-23 04:43:39 +03:00
|
|
|
NORETURN(static void
|
2017-10-27 08:33:33 +03:00
|
|
|
vm_stack_consistency_error(const rb_execution_context_t *ec,
|
2017-06-23 04:43:39 +03:00
|
|
|
const rb_control_frame_t *,
|
|
|
|
const VALUE *));
|
|
|
|
static void
|
2017-10-27 08:33:33 +03:00
|
|
|
vm_stack_consistency_error(const rb_execution_context_t *ec,
|
2017-06-23 04:43:39 +03:00
|
|
|
const rb_control_frame_t *cfp,
|
|
|
|
const VALUE *bp)
|
|
|
|
{
|
2017-10-27 08:33:33 +03:00
|
|
|
const ptrdiff_t nsp = VM_SP_CNT(ec, cfp->sp);
|
|
|
|
const ptrdiff_t nbp = VM_SP_CNT(ec, bp);
|
2017-06-23 04:43:39 +03:00
|
|
|
static const char stack_consistency_error[] =
|
|
|
|
"Stack consistency error (sp: %"PRIdPTRDIFF", bp: %"PRIdPTRDIFF")";
|
2017-06-30 04:56:53 +03:00
|
|
|
#if defined RUBY_DEVEL
|
|
|
|
VALUE mesg = rb_sprintf(stack_consistency_error, nsp, nbp);
|
|
|
|
rb_str_cat_cstr(mesg, "\n");
|
|
|
|
rb_str_append(mesg, rb_iseq_disasm(cfp->iseq));
|
|
|
|
rb_exc_fatal(rb_exc_new3(rb_eFatal, mesg));
|
|
|
|
#else
|
2017-06-23 04:43:39 +03:00
|
|
|
rb_bug(stack_consistency_error, nsp, nbp);
|
2017-06-30 04:56:53 +03:00
|
|
|
#endif
|
2017-06-23 04:43:39 +03:00
|
|
|
}
|
|
|
|
|
2017-12-11 23:30:37 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_plus(VALUE recv, VALUE obj)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
2017-12-11 23:30:37 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_PLUS, INTEGER_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_fix_plus_fix(recv, obj);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
2017-12-11 23:30:37 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2017-04-19 10:27:03 +03:00
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
2017-12-11 23:30:37 +03:00
|
|
|
return Qundef;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2017-04-19 10:27:03 +03:00
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
2017-12-11 23:30:37 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
|
2017-04-19 10:27:03 +03:00
|
|
|
}
|
2017-12-11 23:30:37 +03:00
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cString &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
|
2019-08-06 14:59:41 +03:00
|
|
|
return rb_str_opt_plus(recv, obj);
|
2017-04-19 10:27:03 +03:00
|
|
|
}
|
2017-12-11 23:30:37 +03:00
|
|
|
else if (RBASIC_CLASS(recv) == rb_cArray &&
|
2019-08-06 06:56:18 +03:00
|
|
|
RBASIC_CLASS(obj) == rb_cArray &&
|
2017-12-11 23:30:37 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_ary_plus(recv, obj);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_minus(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MINUS, INTEGER_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_fix_minus_fix(recv, obj);
|
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
|
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MINUS, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) - RFLOAT_VALUE(obj));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_mult(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MULT, INTEGER_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_fix_mul_fix(recv, obj);
|
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
|
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MULT, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_div(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_DIV, INTEGER_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
return (FIX2LONG(obj) == 0) ? Qundef : rb_fix_div_fix(recv, obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
|
2018-11-12 06:26:44 +03:00
|
|
|
return rb_flo_div_flo(recv, obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_DIV, FLOAT_REDEFINED_OP_FLAG)) {
|
2018-11-12 06:26:44 +03:00
|
|
|
return rb_flo_div_flo(recv, obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_mod(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MOD, INTEGER_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
return (FIX2LONG(obj) == 0) ? Qundef : rb_fix_mod_fix(recv, obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
|
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MOD, FLOAT_REDEFINED_OP_FLAG)) {
|
|
|
|
return DBL2NUM(ruby_float_mod(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj)));
|
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_opt_neq(const rb_iseq_t *iseq, CALL_DATA cd, CALL_DATA cd_eq, VALUE recv, VALUE obj)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
if (vm_method_cfunc_is(iseq, cd, recv, rb_obj_not_equal)) {
|
2020-05-29 11:42:23 +03:00
|
|
|
VALUE val = opt_equality(iseq, recv, obj, cd_eq);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
|
2022-11-15 07:24:08 +03:00
|
|
|
if (!UNDEF_P(val)) {
|
2022-01-01 09:41:00 +03:00
|
|
|
return RBOOL(!RTEST(val));
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_lt(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LT, INTEGER_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL((SIGNED_VALUE)recv < (SIGNED_VALUE)obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LT, FLOAT_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
CHECK_CMP_NAN(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) < RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_le(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LE, INTEGER_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL((SIGNED_VALUE)recv <= (SIGNED_VALUE)obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LE, FLOAT_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
CHECK_CMP_NAN(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) <= RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_gt(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GT, INTEGER_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL((SIGNED_VALUE)recv > (SIGNED_VALUE)obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GT, FLOAT_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
CHECK_CMP_NAN(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) > RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_ge(VALUE recv, VALUE obj)
|
|
|
|
{
|
2017-12-11 23:30:37 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GE, INTEGER_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL((SIGNED_VALUE)recv >= (SIGNED_VALUE)obj);
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (FLONUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else if (SPECIAL_CONST_P(recv) || SPECIAL_CONST_P(obj)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cFloat &&
|
|
|
|
RBASIC_CLASS(obj) == rb_cFloat &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_GE, FLOAT_REDEFINED_OP_FLAG)) {
|
2017-04-19 10:27:03 +03:00
|
|
|
CHECK_CMP_NAN(RFLOAT_VALUE(recv), RFLOAT_VALUE(obj));
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RFLOAT_VALUE(recv) >= RFLOAT_VALUE(obj));
|
2017-12-11 23:30:37 +03:00
|
|
|
}
|
|
|
|
else {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-19 10:27:03 +03:00
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_ltlt(VALUE recv, VALUE obj)
|
|
|
|
{
|
|
|
|
if (SPECIAL_CONST_P(recv)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LTLT, STRING_REDEFINED_OP_FLAG)) {
|
2022-07-06 13:31:54 +03:00
|
|
|
if (LIKELY(RB_TYPE_P(obj, T_STRING))) {
|
|
|
|
return rb_str_buf_append(recv, obj);
|
2022-07-27 12:42:27 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-07-06 13:31:54 +03:00
|
|
|
return rb_str_concat(recv, obj);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cArray &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_LTLT, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_ary_push(recv, obj);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 05:38:45 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_and(VALUE recv, VALUE obj)
|
|
|
|
{
|
2022-03-07 22:08:15 +03:00
|
|
|
// If recv and obj are both fixnums, then the bottom tag bit
|
|
|
|
// will be 1 on both. 1 & 1 == 1, so the result value will also
|
|
|
|
// be a fixnum. If either side is *not* a fixnum, then the tag bit
|
|
|
|
// will be 0, and we return Qundef.
|
|
|
|
VALUE ret = ((SIGNED_VALUE) recv) & ((SIGNED_VALUE) obj);
|
|
|
|
|
|
|
|
if (FIXNUM_P(ret) &&
|
2018-09-26 05:38:45 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_AND, INTEGER_REDEFINED_OP_FLAG)) {
|
2022-03-07 22:08:15 +03:00
|
|
|
return ret;
|
2018-09-26 05:38:45 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_or(VALUE recv, VALUE obj)
|
|
|
|
{
|
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_OR, INTEGER_REDEFINED_OP_FLAG)) {
|
2019-11-19 07:16:04 +03:00
|
|
|
return recv | obj;
|
2018-09-26 05:38:45 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_aref(VALUE recv, VALUE obj)
|
|
|
|
{
|
|
|
|
if (SPECIAL_CONST_P(recv)) {
|
2019-11-19 07:16:04 +03:00
|
|
|
if (FIXNUM_2_P(recv, obj) &&
|
2019-06-01 07:34:55 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_AREF, INTEGER_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_fix_aref(recv, obj);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cArray &&
|
2017-10-22 03:19:12 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_AREF, ARRAY_REDEFINED_OP_FLAG)) {
|
2018-02-12 18:25:58 +03:00
|
|
|
if (FIXNUM_P(obj)) {
|
|
|
|
return rb_ary_entry_internal(recv, FIX2LONG(obj));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_ary_aref1(recv, obj);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cHash &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_hash_aref(recv, obj);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_aset(VALUE recv, VALUE obj, VALUE set)
|
|
|
|
{
|
|
|
|
if (SPECIAL_CONST_P(recv)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cArray &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_ASET, ARRAY_REDEFINED_OP_FLAG) &&
|
|
|
|
FIXNUM_P(obj)) {
|
|
|
|
rb_ary_store(recv, FIX2LONG(obj), set);
|
|
|
|
return set;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cHash &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG)) {
|
|
|
|
rb_hash_aset(recv, obj, set);
|
|
|
|
return set;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_aref_with(VALUE recv, VALUE key)
|
|
|
|
{
|
|
|
|
if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_AREF, HASH_REDEFINED_OP_FLAG) &&
|
2022-07-29 02:41:46 +03:00
|
|
|
rb_hash_compare_by_id_p(recv) == Qfalse &&
|
|
|
|
!FL_TEST(recv, RHASH_PROC_DEFAULT)) {
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
return rb_hash_aref(recv, key);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-26 17:38:59 +03:00
|
|
|
VALUE
|
|
|
|
rb_vm_opt_aref_with(VALUE recv, VALUE key)
|
|
|
|
{
|
|
|
|
return vm_opt_aref_with(recv, key);
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_aset_with(VALUE recv, VALUE key, VALUE val)
|
|
|
|
{
|
|
|
|
if (!SPECIAL_CONST_P(recv) && RBASIC_CLASS(recv) == rb_cHash &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_ASET, HASH_REDEFINED_OP_FLAG) &&
|
|
|
|
rb_hash_compare_by_id_p(recv) == Qfalse) {
|
|
|
|
return rb_hash_aset(recv, key, val);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_length(VALUE recv, int bop)
|
|
|
|
{
|
|
|
|
if (SPECIAL_CONST_P(recv)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(bop, STRING_REDEFINED_OP_FLAG)) {
|
|
|
|
if (bop == BOP_EMPTY_P) {
|
|
|
|
return LONG2NUM(RSTRING_LEN(recv));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return rb_str_length(recv);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cArray &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(bop, ARRAY_REDEFINED_OP_FLAG)) {
|
|
|
|
return LONG2NUM(RARRAY_LEN(recv));
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cHash &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(bop, HASH_REDEFINED_OP_FLAG)) {
|
|
|
|
return INT2FIX(RHASH_SIZE(recv));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_empty_p(VALUE recv)
|
|
|
|
{
|
|
|
|
switch (vm_opt_length(recv, BOP_EMPTY_P)) {
|
2017-04-18 16:14:08 +03:00
|
|
|
case Qundef: return Qundef;
|
|
|
|
case INT2FIX(0): return Qtrue;
|
|
|
|
default: return Qfalse;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-02 17:25:38 +03:00
|
|
|
VALUE rb_false(VALUE obj);
|
|
|
|
|
|
|
|
static VALUE
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_opt_nil_p(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv)
|
2019-08-02 17:25:38 +03:00
|
|
|
{
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(recv) &&
|
2019-11-19 07:16:04 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_NIL_P, NIL_REDEFINED_OP_FLAG)) {
|
|
|
|
return Qtrue;
|
|
|
|
}
|
2020-01-08 10:14:01 +03:00
|
|
|
else if (vm_method_cfunc_is(iseq, cd, recv, rb_false)) {
|
2019-11-19 07:16:04 +03:00
|
|
|
return Qfalse;
|
2019-09-27 04:20:56 +03:00
|
|
|
}
|
|
|
|
else {
|
2019-11-19 07:16:04 +03:00
|
|
|
return Qundef;
|
2019-08-02 17:25:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-20 07:51:09 +03:00
|
|
|
static VALUE
|
|
|
|
fix_succ(VALUE x)
|
|
|
|
{
|
|
|
|
switch (x) {
|
2018-11-20 08:58:12 +03:00
|
|
|
case ~0UL:
|
|
|
|
/* 0xFFFF_FFFF == INT2FIX(-1)
|
|
|
|
* `-1.succ` is of course 0. */
|
|
|
|
return INT2FIX(0);
|
|
|
|
case RSHIFT(~0UL, 1):
|
|
|
|
/* 0x7FFF_FFFF == LONG2FIX(0x3FFF_FFFF)
|
|
|
|
* 0x3FFF_FFFF + 1 == 0x4000_0000, which is a Bignum. */
|
|
|
|
return rb_uint2big(1UL << (SIZEOF_LONG * CHAR_BIT - 2));
|
|
|
|
default:
|
|
|
|
/* LONG2FIX(FIX2LONG(x)+FIX2LONG(y))
|
|
|
|
* == ((lx*2+1)/2 + (ly*2+1)/2)*2+1
|
|
|
|
* == lx*2 + ly*2 + 1
|
|
|
|
* == (lx*2+1) + (ly*2+1) - 1
|
|
|
|
* == x + y - 1
|
|
|
|
*
|
|
|
|
* Here, if we put y := INT2FIX(1):
|
|
|
|
*
|
|
|
|
* == x + INT2FIX(1) - 1
|
|
|
|
* == x + 2 .
|
|
|
|
*/
|
|
|
|
return x + 2;
|
2018-11-20 07:51:09 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
static VALUE
|
|
|
|
vm_opt_succ(VALUE recv)
|
|
|
|
{
|
2017-04-19 10:27:03 +03:00
|
|
|
if (FIXNUM_P(recv) &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_SUCC, INTEGER_REDEFINED_OP_FLAG)) {
|
2018-11-20 07:51:09 +03:00
|
|
|
return fix_succ(recv);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2017-04-19 10:27:03 +03:00
|
|
|
else if (SPECIAL_CONST_P(recv)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString &&
|
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_SUCC, STRING_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_str_succ(recv);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
else {
|
2017-04-19 10:27:03 +03:00
|
|
|
return Qundef;
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2020-01-08 10:14:01 +03:00
|
|
|
vm_opt_not(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv)
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
if (vm_method_cfunc_is(iseq, cd, recv, rb_obj_not)) {
|
2022-01-01 09:41:00 +03:00
|
|
|
return RBOOL(!RTEST(recv));
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
vm_opt_regexpmatch2(VALUE recv, VALUE obj)
|
|
|
|
{
|
2019-11-19 07:16:04 +03:00
|
|
|
if (SPECIAL_CONST_P(recv)) {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
else if (RBASIC_CLASS(recv) == rb_cString &&
|
2018-09-19 10:46:44 +03:00
|
|
|
CLASS_OF(obj) == rb_cRegexp &&
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MATCH, STRING_REDEFINED_OP_FLAG)) {
|
2017-09-03 15:35:25 +03:00
|
|
|
return rb_reg_match(obj, recv);
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
}
|
2019-11-19 07:16:04 +03:00
|
|
|
else if (RBASIC_CLASS(recv) == rb_cRegexp &&
|
2018-09-19 04:41:56 +03:00
|
|
|
BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG)) {
|
|
|
|
return rb_reg_match(recv, obj);
|
|
|
|
}
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
else {
|
|
|
|
return Qundef;
|
|
|
|
}
|
|
|
|
}
|
2017-11-14 15:58:36 +03:00
|
|
|
|
|
|
|
rb_event_flag_t rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos);
|
|
|
|
|
2020-09-01 05:48:34 +03:00
|
|
|
NOINLINE(static void vm_trace(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp));
|
2017-11-14 15:58:36 +03:00
|
|
|
|
2018-11-26 21:16:39 +03:00
|
|
|
static inline void
|
|
|
|
vm_trace_hook(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE *pc,
|
|
|
|
rb_event_flag_t pc_events, rb_event_flag_t target_event,
|
2022-04-30 01:54:16 +03:00
|
|
|
rb_hook_list_t *global_hooks, rb_hook_list_t *const *local_hooks_ptr, VALUE val)
|
2018-11-26 21:16:39 +03:00
|
|
|
{
|
|
|
|
rb_event_flag_t event = pc_events & target_event;
|
|
|
|
VALUE self = GET_SELF();
|
|
|
|
|
|
|
|
VM_ASSERT(rb_popcount64((uint64_t)event) == 1);
|
|
|
|
|
|
|
|
if (event & global_hooks->events) {
|
|
|
|
/* increment PC because source line is calculated with PC-1 */
|
|
|
|
reg_cfp->pc++;
|
|
|
|
vm_dtrace(event, ec);
|
|
|
|
rb_exec_event_hook_orig(ec, global_hooks, event, self, 0, 0, 0 , val, 0);
|
|
|
|
reg_cfp->pc--;
|
|
|
|
}
|
|
|
|
|
2022-04-30 01:54:16 +03:00
|
|
|
// Load here since global hook above can add and free local hooks
|
|
|
|
rb_hook_list_t *local_hooks = *local_hooks_ptr;
|
2018-11-26 21:16:39 +03:00
|
|
|
if (local_hooks != NULL) {
|
|
|
|
if (event & local_hooks->events) {
|
|
|
|
/* increment PC because source line is calculated with PC-1 */
|
|
|
|
reg_cfp->pc++;
|
|
|
|
rb_exec_event_hook_orig(ec, local_hooks, event, self, 0, 0, 0 , val, 0);
|
|
|
|
reg_cfp->pc--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#define VM_TRACE_HOOK(target_event, val) do { \
|
|
|
|
if ((pc_events & (target_event)) & enabled_flags) { \
|
2022-04-30 01:54:16 +03:00
|
|
|
vm_trace_hook(ec, reg_cfp, pc, pc_events, (target_event), global_hooks, local_hooks_ptr, (val)); \
|
2018-11-26 21:16:39 +03:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2023-08-01 11:25:20 +03:00
|
|
|
static VALUE
|
|
|
|
rescue_errinfo(rb_execution_context_t *ec, rb_control_frame_t *cfp)
|
|
|
|
{
|
|
|
|
VM_ASSERT(VM_FRAME_RUBYFRAME_P(cfp));
|
|
|
|
VM_ASSERT(ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_RESCUE);
|
|
|
|
return cfp->ep[VM_ENV_INDEX_LAST_LVAR];
|
|
|
|
}
|
|
|
|
|
2017-11-14 15:58:36 +03:00
|
|
|
static void
|
2020-09-01 05:48:34 +03:00
|
|
|
vm_trace(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)
|
2017-11-14 15:58:36 +03:00
|
|
|
{
|
2020-09-01 05:48:34 +03:00
|
|
|
const VALUE *pc = reg_cfp->pc;
|
2018-11-26 21:16:39 +03:00
|
|
|
rb_event_flag_t enabled_flags = ruby_vm_event_flags & ISEQ_TRACE_EVENTS;
|
2021-07-14 02:01:09 +03:00
|
|
|
rb_event_flag_t global_events = enabled_flags;
|
2017-11-14 15:58:36 +03:00
|
|
|
|
2018-11-26 21:16:39 +03:00
|
|
|
if (enabled_flags == 0 && ruby_vm_event_local_num == 0) {
|
|
|
|
return;
|
2017-11-17 09:24:55 +03:00
|
|
|
}
|
2017-12-11 22:17:25 +03:00
|
|
|
else {
|
|
|
|
const rb_iseq_t *iseq = reg_cfp->iseq;
|
2022-04-30 01:54:16 +03:00
|
|
|
VALUE iseq_val = (VALUE)iseq;
|
2022-03-23 22:19:48 +03:00
|
|
|
size_t pos = pc - ISEQ_BODY(iseq)->iseq_encoded;
|
2018-11-26 21:16:54 +03:00
|
|
|
rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pos);
|
2018-12-06 13:52:27 +03:00
|
|
|
rb_hook_list_t *local_hooks = iseq->aux.exec.local_hooks;
|
2022-04-30 01:54:16 +03:00
|
|
|
rb_hook_list_t *const *local_hooks_ptr = &iseq->aux.exec.local_hooks;
|
2021-07-14 02:01:09 +03:00
|
|
|
rb_event_flag_t iseq_local_events = local_hooks != NULL ? local_hooks->events : 0;
|
|
|
|
rb_hook_list_t *bmethod_local_hooks = NULL;
|
2022-04-30 01:54:16 +03:00
|
|
|
rb_hook_list_t **bmethod_local_hooks_ptr = NULL;
|
2021-07-14 02:01:09 +03:00
|
|
|
rb_event_flag_t bmethod_local_events = 0;
|
2022-04-30 01:54:16 +03:00
|
|
|
const bool bmethod_frame = VM_FRAME_BMETHOD_P(reg_cfp);
|
2021-07-14 02:01:09 +03:00
|
|
|
enabled_flags |= iseq_local_events;
|
|
|
|
|
|
|
|
VM_ASSERT((iseq_local_events & ~ISEQ_TRACE_EVENTS) == 0);
|
|
|
|
|
|
|
|
if (bmethod_frame) {
|
|
|
|
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(reg_cfp);
|
|
|
|
VM_ASSERT(me->def->type == VM_METHOD_TYPE_BMETHOD);
|
|
|
|
bmethod_local_hooks = me->def->body.bmethod.hooks;
|
2022-04-30 01:54:16 +03:00
|
|
|
bmethod_local_hooks_ptr = &me->def->body.bmethod.hooks;
|
2021-07-14 02:01:09 +03:00
|
|
|
if (bmethod_local_hooks) {
|
|
|
|
bmethod_local_events = bmethod_local_hooks->events;
|
|
|
|
}
|
|
|
|
}
|
2018-11-26 21:16:39 +03:00
|
|
|
|
2017-11-17 09:24:55 +03:00
|
|
|
|
2021-07-14 02:01:09 +03:00
|
|
|
if ((pc_events & enabled_flags) == 0 && !bmethod_frame) {
|
2017-12-11 22:17:25 +03:00
|
|
|
#if 0
|
|
|
|
/* disable trace */
|
2018-11-26 21:16:39 +03:00
|
|
|
/* TODO: incomplete */
|
2017-12-11 22:17:25 +03:00
|
|
|
rb_iseq_trace_set(iseq, vm_event_flags & ISEQ_TRACE_EVENTS);
|
|
|
|
#else
|
|
|
|
/* do not disable trace because of performance problem
|
|
|
|
* (re-enable overhead)
|
|
|
|
*/
|
|
|
|
#endif
|
|
|
|
return;
|
2018-10-20 13:45:55 +03:00
|
|
|
}
|
2018-11-26 21:16:39 +03:00
|
|
|
else if (ec->trace_arg != NULL) {
|
|
|
|
/* already tracing */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
2020-12-19 00:38:58 +03:00
|
|
|
rb_hook_list_t *global_hooks = rb_ec_ractor_hooks(ec);
|
2021-07-14 02:01:09 +03:00
|
|
|
/* Note, not considering iseq local events here since the same
|
|
|
|
* iseq could be used in multiple bmethods. */
|
|
|
|
rb_event_flag_t bmethod_events = global_events | bmethod_local_events;
|
2018-11-26 21:16:39 +03:00
|
|
|
|
|
|
|
if (0) {
|
2021-09-09 17:21:06 +03:00
|
|
|
ruby_debug_printf("vm_trace>>%4d (%4x) - %s:%d %s\n",
|
|
|
|
(int)pos,
|
|
|
|
(int)pc_events,
|
|
|
|
RSTRING_PTR(rb_iseq_path(iseq)),
|
|
|
|
(int)rb_iseq_line_no(iseq, pos),
|
|
|
|
RSTRING_PTR(rb_iseq_label(iseq)));
|
2018-11-26 21:16:39 +03:00
|
|
|
}
|
|
|
|
VM_ASSERT(reg_cfp->pc == pc);
|
|
|
|
VM_ASSERT(pc_events != 0);
|
|
|
|
|
|
|
|
/* check traces */
|
2021-07-14 02:01:09 +03:00
|
|
|
if ((pc_events & RUBY_EVENT_B_CALL) && bmethod_frame && (bmethod_events & RUBY_EVENT_CALL)) {
|
|
|
|
/* b_call instruction running as a method. Fire call event. */
|
2022-04-30 01:54:16 +03:00
|
|
|
vm_trace_hook(ec, reg_cfp, pc, RUBY_EVENT_CALL, RUBY_EVENT_CALL, global_hooks, bmethod_local_hooks_ptr, Qundef);
|
2021-07-14 02:01:09 +03:00
|
|
|
}
|
2018-11-26 21:16:39 +03:00
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_CLASS | RUBY_EVENT_CALL | RUBY_EVENT_B_CALL, Qundef);
|
2023-08-01 11:25:20 +03:00
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_RESCUE, rescue_errinfo(ec, reg_cfp));
|
2018-11-26 21:16:39 +03:00
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_LINE, Qundef);
|
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_COVERAGE_LINE, Qundef);
|
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_COVERAGE_BRANCH, Qundef);
|
|
|
|
VM_TRACE_HOOK(RUBY_EVENT_END | RUBY_EVENT_RETURN | RUBY_EVENT_B_RETURN, TOPN(0));
|
2021-07-14 02:01:09 +03:00
|
|
|
if ((pc_events & RUBY_EVENT_B_RETURN) && bmethod_frame && (bmethod_events & RUBY_EVENT_RETURN)) {
|
|
|
|
/* b_return instruction running as a method. Fire return event. */
|
2022-04-30 01:54:16 +03:00
|
|
|
vm_trace_hook(ec, reg_cfp, pc, RUBY_EVENT_RETURN, RUBY_EVENT_RETURN, global_hooks, bmethod_local_hooks_ptr, TOPN(0));
|
2021-07-14 02:01:09 +03:00
|
|
|
}
|
2022-04-30 01:54:16 +03:00
|
|
|
|
|
|
|
// Pin the iseq since `local_hooks_ptr` points inside the iseq's slot on the GC heap.
|
|
|
|
// We need the pointer to stay valid in case compaction happens in a trace hook.
|
|
|
|
//
|
|
|
|
// Similar treatment is unnecessary for `bmethod_local_hooks_ptr` since
|
|
|
|
// storage for `rb_method_definition_t` is not on the GC heap.
|
|
|
|
RB_GC_GUARD(iseq_val);
|
2018-11-26 21:16:39 +03:00
|
|
|
}
|
2017-11-14 15:58:36 +03:00
|
|
|
}
|
|
|
|
}
|
2021-07-14 02:01:09 +03:00
|
|
|
#undef VM_TRACE_HOOK
|
2018-09-11 12:48:58 +03:00
|
|
|
|
|
|
|
#if VM_CHECK_MODE > 0
|
2020-06-21 20:27:04 +03:00
|
|
|
NORETURN( NOINLINE( COLDFUNC
|
2020-12-25 17:36:25 +03:00
|
|
|
void rb_vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c)));
|
2018-09-11 12:48:58 +03:00
|
|
|
|
|
|
|
void
|
|
|
|
Init_vm_stack_canary(void)
|
|
|
|
{
|
|
|
|
/* This has to be called _after_ our PRNG is properly set up. */
|
2018-09-11 14:05:20 +03:00
|
|
|
int n = ruby_fill_random_bytes(&vm_stack_canary, sizeof vm_stack_canary, false);
|
2020-03-09 20:22:11 +03:00
|
|
|
vm_stack_canary |= 0x01; // valid VALUE (Fixnum)
|
2018-09-11 12:48:58 +03:00
|
|
|
|
2019-02-01 10:26:39 +03:00
|
|
|
vm_stack_canary_was_born = true;
|
2018-09-11 12:48:58 +03:00
|
|
|
VM_ASSERT(n == 0);
|
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
void
|
2020-12-25 17:36:25 +03:00
|
|
|
rb_vm_canary_is_found_dead(enum ruby_vminsn_type i, VALUE c)
|
2018-09-11 12:48:58 +03:00
|
|
|
{
|
|
|
|
/* Because a method has already been called, why not call
|
|
|
|
* another one. */
|
|
|
|
const char *insn = rb_insns_name(i);
|
|
|
|
VALUE inspection = rb_inspect(c);
|
|
|
|
const char *str = StringValueCStr(inspection);
|
|
|
|
|
2018-09-11 14:01:18 +03:00
|
|
|
rb_bug("dead canary found at %s: %s", insn, str);
|
2018-09-11 12:48:58 +03:00
|
|
|
}
|
|
|
|
|
2018-09-11 14:26:15 +03:00
|
|
|
#else
|
2018-09-11 12:48:58 +03:00
|
|
|
void Init_vm_stack_canary(void) { /* nothing to do */ }
|
|
|
|
#endif
|
2019-11-07 10:58:00 +03:00
|
|
|
|
|
|
|
|
|
|
|
/* a part of the following code is generated by this ruby script:
|
|
|
|
|
|
|
|
16.times{|i|
|
|
|
|
typedef_args = (0...i).map{|j| "VALUE v#{j+1}"}.join(", ")
|
|
|
|
typedef_args.prepend(", ") if i != 0
|
|
|
|
call_args = (0...i).map{|j| "argv[#{j}]"}.join(", ")
|
|
|
|
call_args.prepend(", ") if i != 0
|
|
|
|
puts %Q{
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker#{i}(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr#{i}_t)(rb_execution_context_t *ec, VALUE self#{typedef_args});
|
|
|
|
return (*(rb_invoke_funcptr#{i}_t)funcptr)(ec, self#{call_args});
|
|
|
|
}}
|
|
|
|
}
|
|
|
|
|
|
|
|
puts
|
|
|
|
puts "static VALUE (* const cfunc_invokers[])(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr) = {"
|
|
|
|
16.times{|i|
|
|
|
|
puts " builtin_invoker#{i},"
|
|
|
|
}
|
|
|
|
puts "};"
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker0(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr0_t)(rb_execution_context_t *ec, VALUE self);
|
|
|
|
return (*(rb_invoke_funcptr0_t)funcptr)(ec, self);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker1(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr1_t)(rb_execution_context_t *ec, VALUE self, VALUE v1);
|
|
|
|
return (*(rb_invoke_funcptr1_t)funcptr)(ec, self, argv[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker2(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr2_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2);
|
|
|
|
return (*(rb_invoke_funcptr2_t)funcptr)(ec, self, argv[0], argv[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker3(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr3_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3);
|
|
|
|
return (*(rb_invoke_funcptr3_t)funcptr)(ec, self, argv[0], argv[1], argv[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker4(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr4_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4);
|
|
|
|
return (*(rb_invoke_funcptr4_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker5(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr5_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5);
|
|
|
|
return (*(rb_invoke_funcptr5_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker6(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr6_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6);
|
|
|
|
return (*(rb_invoke_funcptr6_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker7(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr7_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7);
|
|
|
|
return (*(rb_invoke_funcptr7_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker8(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr8_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8);
|
|
|
|
return (*(rb_invoke_funcptr8_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker9(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr9_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9);
|
|
|
|
return (*(rb_invoke_funcptr9_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker10(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr10_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10);
|
|
|
|
return (*(rb_invoke_funcptr10_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker11(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr11_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10, VALUE v11);
|
|
|
|
return (*(rb_invoke_funcptr11_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker12(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr12_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10, VALUE v11, VALUE v12);
|
|
|
|
return (*(rb_invoke_funcptr12_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker13(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr13_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10, VALUE v11, VALUE v12, VALUE v13);
|
|
|
|
return (*(rb_invoke_funcptr13_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker14(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr14_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10, VALUE v11, VALUE v12, VALUE v13, VALUE v14);
|
|
|
|
return (*(rb_invoke_funcptr14_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
builtin_invoker15(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr)
|
|
|
|
{
|
|
|
|
typedef VALUE (*rb_invoke_funcptr15_t)(rb_execution_context_t *ec, VALUE self, VALUE v1, VALUE v2, VALUE v3, VALUE v4, VALUE v5, VALUE v6, VALUE v7, VALUE v8, VALUE v9, VALUE v10, VALUE v11, VALUE v12, VALUE v13, VALUE v14, VALUE v15);
|
|
|
|
return (*(rb_invoke_funcptr15_t)funcptr)(ec, self, argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13], argv[14]);
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef VALUE (*builtin_invoker)(rb_execution_context_t *ec, VALUE self, const VALUE *argv, rb_insn_func_t funcptr);
|
|
|
|
|
|
|
|
static builtin_invoker
|
|
|
|
lookup_builtin_invoker(int argc)
|
|
|
|
{
|
|
|
|
static const builtin_invoker invokers[] = {
|
|
|
|
builtin_invoker0,
|
|
|
|
builtin_invoker1,
|
|
|
|
builtin_invoker2,
|
|
|
|
builtin_invoker3,
|
|
|
|
builtin_invoker4,
|
|
|
|
builtin_invoker5,
|
|
|
|
builtin_invoker6,
|
|
|
|
builtin_invoker7,
|
|
|
|
builtin_invoker8,
|
|
|
|
builtin_invoker9,
|
|
|
|
builtin_invoker10,
|
|
|
|
builtin_invoker11,
|
|
|
|
builtin_invoker12,
|
|
|
|
builtin_invoker13,
|
|
|
|
builtin_invoker14,
|
|
|
|
builtin_invoker15,
|
|
|
|
};
|
|
|
|
|
|
|
|
return invokers[argc];
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline VALUE
|
2020-06-21 20:27:04 +03:00
|
|
|
invoke_bf(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const struct rb_builtin_function* bf, const VALUE *argv)
|
2019-11-07 10:58:00 +03:00
|
|
|
{
|
2023-03-12 01:25:11 +03:00
|
|
|
const bool canary_p = ISEQ_BODY(reg_cfp->iseq)->builtin_attrs & BUILTIN_ATTR_LEAF; // Verify an assumption of `Primitive.attr! :leaf`
|
2020-06-21 20:27:04 +03:00
|
|
|
SETUP_CANARY(canary_p);
|
2024-10-10 05:29:57 +03:00
|
|
|
rb_insn_func_t func_ptr = (rb_insn_func_t)(uintptr_t)bf->func_ptr;
|
|
|
|
VALUE ret = (*lookup_builtin_invoker(bf->argc))(ec, reg_cfp->self, argv, func_ptr);
|
2020-06-21 20:27:04 +03:00
|
|
|
CHECK_CANARY(canary_p, BIN(invokebuiltin));
|
|
|
|
return ret;
|
2019-11-07 10:58:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-11-09 10:15:17 +03:00
|
|
|
vm_invoke_builtin(rb_execution_context_t *ec, rb_control_frame_t *cfp, const struct rb_builtin_function* bf, const VALUE *argv)
|
2019-11-07 10:58:00 +03:00
|
|
|
{
|
|
|
|
return invoke_bf(ec, cfp, bf, argv);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-11-18 04:36:48 +03:00
|
|
|
vm_invoke_builtin_delegate(rb_execution_context_t *ec, rb_control_frame_t *cfp, const struct rb_builtin_function *bf, unsigned int start_index)
|
2019-11-07 10:58:00 +03:00
|
|
|
{
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
if (0) { // debug print
|
2021-09-09 17:21:06 +03:00
|
|
|
fputs("vm_invoke_builtin_delegate: passing -> ", stderr);
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
for (int i=0; i<bf->argc; i++) {
|
2022-03-23 22:19:48 +03:00
|
|
|
ruby_debug_printf(":%s ", rb_id2name(ISEQ_BODY(cfp->iseq)->local_table[i+start_index]));
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
}
|
2024-10-08 09:41:28 +03:00
|
|
|
ruby_debug_printf("\n" "%s %s(%d):%p\n", RUBY_FUNCTION_NAME_STRING, bf->name, bf->argc,
|
|
|
|
(void *)(uintptr_t)bf->func_ptr);
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
}
|
|
|
|
|
2019-11-25 08:02:15 +03:00
|
|
|
if (bf->argc == 0) {
|
|
|
|
return invoke_bf(ec, cfp, bf, NULL);
|
|
|
|
}
|
|
|
|
else {
|
2022-03-23 22:19:48 +03:00
|
|
|
const VALUE *argv = cfp->ep - ISEQ_BODY(cfp->iseq)->local_table_size - VM_ENV_DATA_SIZE + 1 + start_index;
|
2019-11-25 08:02:15 +03:00
|
|
|
return invoke_bf(ec, cfp, bf, argv);
|
|
|
|
}
|
2019-11-07 10:58:00 +03:00
|
|
|
}
|
2019-11-13 22:18:41 +03:00
|
|
|
|
|
|
|
// for __builtin_inline!()
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_vm_lvar_exposed(rb_execution_context_t *ec, int index)
|
|
|
|
{
|
|
|
|
const rb_control_frame_t *cfp = ec->cfp;
|
|
|
|
return cfp->ep[index];
|
|
|
|
}
|