2018-12-27 09:12:09 +03:00
|
|
|
/* -*- C -*-
|
2007-01-16 11:52:22 +03:00
|
|
|
insns.def - YARV instruction definitions
|
|
|
|
|
|
|
|
$Author: $
|
|
|
|
created at: 04/01/01 01:17:55 JST
|
|
|
|
|
* blockinlining.c, compile.c, compile.h, debug.c, debug.h,
id.c, insnhelper.h, insns.def, thread.c, thread_pthread.ci,
thread_pthread.h, thread_win32.ci, thread_win32.h, vm.h,
vm_dump.c, vm_evalbody.ci, vm_opts.h: fix comments and
copyright year.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13920 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-11-14 01:13:04 +03:00
|
|
|
Copyright (C) 2004-2007 Koichi Sasada
|
2018-01-12 11:38:08 +03:00
|
|
|
Massive rewrite by @shyouhei in 2017.
|
2018-01-12 11:38:07 +03:00
|
|
|
*/
|
2018-01-10 04:53:24 +03:00
|
|
|
|
2018-01-12 11:38:08 +03:00
|
|
|
/* Some comments about this file's contents:
|
|
|
|
|
|
|
|
- The new format aims to be editable by C editor of your choice;
|
|
|
|
your mileage might vary of course.
|
|
|
|
|
|
|
|
- Each instructions are in following format:
|
|
|
|
|
|
|
|
DEFINE_INSN
|
|
|
|
instruction_name
|
|
|
|
(type operand, type operand, ..)
|
|
|
|
(pop_values, ..)
|
|
|
|
(return values ..)
|
|
|
|
// attr type name contents..
|
|
|
|
{
|
|
|
|
.. // insn body
|
|
|
|
}
|
|
|
|
|
|
|
|
- Unlike the old format which was line-oriented, you can now place
|
|
|
|
newlines and comments at liberal positions.
|
|
|
|
|
|
|
|
- `DEFINE_INSN` is a keyword.
|
|
|
|
|
|
|
|
- An instruction name must be a valid C identifier.
|
|
|
|
|
|
|
|
- Operands, pop values, return values are series of either variable
|
|
|
|
declarations, keyword `void`, or keyword `...`. They are much
|
|
|
|
like C function declarations.
|
|
|
|
|
|
|
|
- Attribute pragmas are optional, and can include arbitrary C
|
|
|
|
expressions. You can write anything there but as of writing,
|
2018-01-27 16:50:28 +03:00
|
|
|
supported attributes are:
|
|
|
|
|
|
|
|
* sp_inc: Used to dynamically calculate sp increase in
|
|
|
|
`insn_stack_increase`.
|
|
|
|
|
2018-07-25 17:55:43 +03:00
|
|
|
* handles_sp: If it is true, VM deals with sp in the insn.
|
2018-10-29 06:21:22 +03:00
|
|
|
Default is if the instruction takes ISEQ operand or not.
|
2018-07-19 16:25:22 +03:00
|
|
|
|
2018-09-11 12:48:58 +03:00
|
|
|
* leaf: indicates that the instruction is "leaf" i.e. it does
|
2018-10-29 06:21:22 +03:00
|
|
|
not introduce new stack frame on top of it.
|
|
|
|
If an instruction handles sp, that can never be a leaf.
|
2018-09-11 12:48:58 +03:00
|
|
|
|
2018-01-12 11:38:08 +03:00
|
|
|
- Attributes can access operands, but not stack (push/pop) variables.
|
|
|
|
|
|
|
|
- An instruction's body is a pure C block, copied verbatimly into
|
|
|
|
the generated C source code.
|
2018-01-10 04:53:24 +03:00
|
|
|
*/
|
2018-01-09 16:30:29 +03:00
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* nop */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
nop
|
|
|
|
()
|
|
|
|
()
|
|
|
|
()
|
|
|
|
{
|
|
|
|
/* none */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with variables */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get local variable (pointed by `idx' and `level').
|
2012-10-04 17:52:20 +04:00
|
|
|
'level' indicates the nesting depth from the current block.
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
getlocal
|
2012-10-04 17:52:20 +04:00
|
|
|
(lindex_t idx, rb_num_t level)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = *(vm_get_ep(GET_EP(), level) - idx);
|
2017-05-31 09:46:57 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(lvar_get);
|
|
|
|
(void)RB_DEBUG_COUNTER_INC_IF(lvar_get_dynamic, level > 0);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Set a local variable (pointed to by 'idx') as val.
|
2012-10-04 17:52:20 +04:00
|
|
|
'level' indicates the nesting depth from the current block.
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
setlocal
|
2012-10-04 17:52:20 +04:00
|
|
|
(lindex_t idx, rb_num_t level)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
|
|
|
()
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
vm_env_write(vm_get_ep(GET_EP(), level), -(int)idx, val);
|
2017-05-31 09:46:57 +03:00
|
|
|
RB_DEBUG_COUNTER_INC(lvar_set);
|
|
|
|
(void)RB_DEBUG_COUNTER_INC_IF(lvar_set_dynamic, level > 0);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get a block parameter. */
|
2017-10-24 14:13:49 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
getblockparam
|
|
|
|
(lindex_t idx, rb_num_t level)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
const VALUE *ep = vm_get_ep(GET_EP(), level);
|
|
|
|
VM_ASSERT(VM_ENV_LOCAL_P(ep));
|
|
|
|
|
|
|
|
if (!VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM)) {
|
2017-10-27 09:21:50 +03:00
|
|
|
val = rb_vm_bh_to_procval(ec, VM_ENV_BLOCK_HANDLER(ep));
|
2017-10-24 14:13:49 +03:00
|
|
|
vm_env_write(ep, -(int)idx, val);
|
|
|
|
VM_ENV_FLAGS_SET(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
val = *(ep - idx);
|
|
|
|
RB_DEBUG_COUNTER_INC(lvar_get);
|
|
|
|
(void)RB_DEBUG_COUNTER_INC_IF(lvar_get_dynamic, level > 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Set block parameter. */
|
2017-10-24 14:13:49 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setblockparam
|
|
|
|
(lindex_t idx, rb_num_t level)
|
|
|
|
(VALUE val)
|
|
|
|
()
|
|
|
|
{
|
|
|
|
const VALUE *ep = vm_get_ep(GET_EP(), level);
|
|
|
|
VM_ASSERT(VM_ENV_LOCAL_P(ep));
|
|
|
|
|
|
|
|
vm_env_write(ep, -(int)idx, val);
|
|
|
|
RB_DEBUG_COUNTER_INC(lvar_set);
|
|
|
|
(void)RB_DEBUG_COUNTER_INC_IF(lvar_set_dynamic, level > 0);
|
|
|
|
|
|
|
|
VM_ENV_FLAGS_SET(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM);
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get special proxy object which only responds to `call` method if the block parameter
|
2018-01-07 22:18:49 +03:00
|
|
|
represents a iseq/ifunc block. Otherwise, same as `getblockparam`.
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
getblockparamproxy
|
|
|
|
(lindex_t idx, rb_num_t level)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
const VALUE *ep = vm_get_ep(GET_EP(), level);
|
|
|
|
VM_ASSERT(VM_ENV_LOCAL_P(ep));
|
|
|
|
|
|
|
|
if (!VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM)) {
|
|
|
|
VALUE block_handler = VM_ENV_BLOCK_HANDLER(ep);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2018-01-07 22:18:49 +03:00
|
|
|
if (block_handler) {
|
|
|
|
switch (vm_block_handler_type(block_handler)) {
|
|
|
|
case block_handler_type_iseq:
|
|
|
|
case block_handler_type_ifunc:
|
|
|
|
val = rb_block_param_proxy;
|
|
|
|
break;
|
|
|
|
case block_handler_type_symbol:
|
|
|
|
val = rb_sym_to_proc(VM_BH_TO_SYMBOL(block_handler));
|
|
|
|
goto INSN_LABEL(set);
|
|
|
|
case block_handler_type_proc:
|
|
|
|
val = VM_BH_TO_PROC(block_handler);
|
|
|
|
goto INSN_LABEL(set);
|
|
|
|
default:
|
|
|
|
VM_UNREACHABLE(getblockparamproxy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
val = Qnil;
|
|
|
|
INSN_LABEL(set):
|
|
|
|
vm_env_write(ep, -(int)idx, val);
|
|
|
|
VM_ENV_FLAGS_SET(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
val = *(ep - idx);
|
|
|
|
RB_DEBUG_COUNTER_INC(lvar_get);
|
|
|
|
(void)RB_DEBUG_COUNTER_INC_IF(lvar_get_dynamic, level > 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get value of special local variable ($~, $_, ..). */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
getspecial
|
2012-12-10 10:11:16 +04:00
|
|
|
(rb_num_t key, rb_num_t type)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
2019-02-01 09:29:02 +03:00
|
|
|
/* `$~ = MatchData.allocate; $&` can raise. */
|
|
|
|
// attr bool leaf = (type == 0) ? true : false;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2017-10-27 09:21:50 +03:00
|
|
|
val = vm_getspecial(ec, GET_LEP(), key, type);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Set value of special local variable ($~, $_, ...) to obj. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setspecial
|
2012-12-10 10:11:16 +04:00
|
|
|
(rb_num_t key)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE obj)
|
|
|
|
()
|
|
|
|
{
|
2017-10-27 09:21:50 +03:00
|
|
|
lep_svar_set(ec, GET_LEP(), key, obj);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get value of instance variable id of self. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
getinstancevariable
|
2019-10-12 03:06:41 +03:00
|
|
|
(ID id, IVC ic)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
2020-12-11 01:40:22 +03:00
|
|
|
/* Ractor crashes when it accesses class/module-level instances variables. */
|
|
|
|
// attr bool leaf = false; /* has IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2020-10-16 09:20:40 +03:00
|
|
|
val = vm_getinstancevariable(GET_ISEQ(), GET_SELF(), id, ic);
|
2007-02-04 22:17:33 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Set value of instance variable id of self to val. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setinstancevariable
|
2019-10-12 03:06:41 +03:00
|
|
|
(ID id, IVC ic)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
|
|
|
()
|
2024-05-30 15:55:32 +03:00
|
|
|
// attr bool leaf = false; /* has rb_check_frozen() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2020-10-16 09:20:40 +03:00
|
|
|
vm_setinstancevariable(GET_ISEQ(), GET_SELF(), id, val, ic);
|
2007-02-04 22:17:33 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Get value of class variable id of klass as val. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
getclassvariable
|
2022-02-02 17:14:59 +03:00
|
|
|
(ID id, ICVARC ic)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* "class variable access from toplevel" warning can be hooked. */
|
|
|
|
// attr bool leaf = false; /* has rb_warning() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2021-06-01 20:34:06 +03:00
|
|
|
rb_control_frame_t *cfp = GET_CFP();
|
2022-02-02 17:14:59 +03:00
|
|
|
val = vm_getclassvariable(GET_ISEQ(), cfp, id, ic);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* Set value of class variable id of klass as val. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setclassvariable
|
2022-02-02 17:14:59 +03:00
|
|
|
(ID id, ICVARC ic)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* "class variable access from toplevel" warning can be hooked. */
|
|
|
|
// attr bool leaf = false; /* has rb_warning() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2016-09-08 07:44:51 +03:00
|
|
|
vm_ensure_not_refinement_module(GET_SELF());
|
2022-02-02 17:14:59 +03:00
|
|
|
vm_setclassvariable(GET_ISEQ(), GET_CFP(), id, val, ic);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_getconstant_path
|
|
|
|
(IC ic)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
// attr bool leaf = false; /* may autoload or raise */
|
|
|
|
{
|
2023-10-13 18:52:23 +03:00
|
|
|
val = rb_vm_opt_getconstant_path(ec, GET_CFP(), ic);
|
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
2022-08-10 20:35:48 +03:00
|
|
|
}
|
|
|
|
|
2019-08-14 21:19:16 +03:00
|
|
|
/* Get constant variable id. If klass is Qnil and allow_nil is Qtrue, constants
|
2018-07-27 09:28:14 +03:00
|
|
|
are searched in the current scope. Otherwise, get constant under klass
|
2007-01-16 11:52:22 +03:00
|
|
|
class or module.
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
getconstant
|
2019-08-14 21:19:16 +03:00
|
|
|
(ID id)
|
|
|
|
(VALUE klass, VALUE allow_nil)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* getconstant can kick autoload */
|
|
|
|
// attr bool leaf = false; /* has rb_autoload_load() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2019-08-14 19:59:27 +03:00
|
|
|
val = vm_get_ev_const(ec, klass, id, allow_nil == Qtrue, 0);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-07-27 09:28:14 +03:00
|
|
|
/* Set constant variable id under cbase class or module.
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
setconstant
|
|
|
|
(ID id)
|
2008-05-14 06:31:28 +04:00
|
|
|
(VALUE val, VALUE cbase)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Assigning an object to a constant is basically a leaf operation.
|
|
|
|
* The problem is, assigning a Module instance to a constant _names_
|
|
|
|
* that module. Naming involves string manipulations, which are
|
|
|
|
* method calls. */
|
|
|
|
// attr bool leaf = false; /* has StringValue() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2008-05-14 06:31:28 +04:00
|
|
|
vm_check_if_namespace(cbase);
|
2016-09-08 07:44:51 +03:00
|
|
|
vm_ensure_not_refinement_module(GET_SELF());
|
2008-05-14 06:31:28 +04:00
|
|
|
rb_const_set(cbase, id, val);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* get global variable id. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
getglobal
|
2020-07-03 10:56:44 +03:00
|
|
|
(ID gid)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
2020-07-03 10:56:44 +03:00
|
|
|
// attr bool leaf = false;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2020-07-03 10:56:44 +03:00
|
|
|
val = rb_gvar_get(gid);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* set global variable id as val. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setglobal
|
2020-07-03 10:56:44 +03:00
|
|
|
(ID gid)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
|
|
|
()
|
2020-07-03 10:56:44 +03:00
|
|
|
// attr bool leaf = false;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2020-07-03 10:56:44 +03:00
|
|
|
rb_gvar_set(gid, val);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with values */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put nil to stack. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
putnil
|
|
|
|
()
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
val = Qnil;
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put self. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
putself
|
|
|
|
()
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
val = GET_SELF();
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put some object.
|
2008-07-01 07:05:58 +04:00
|
|
|
i.e. Fixnum, true, false, nil, and so on.
|
2008-05-14 06:31:28 +04:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
2008-07-01 07:05:58 +04:00
|
|
|
putobject
|
|
|
|
(VALUE val)
|
2008-05-14 06:31:28 +04:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2008-07-01 07:05:58 +04:00
|
|
|
/* */
|
2008-05-14 06:31:28 +04:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put special object. "value_type" is for expansion. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
2008-07-01 07:05:58 +04:00
|
|
|
putspecialobject
|
|
|
|
(rb_num_t value_type)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
2021-12-03 02:53:39 +03:00
|
|
|
// attr bool leaf = (value_type == VM_SPECIAL_OBJECT_VMCORE); /* others may raise when allocating singleton */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
enum vm_special_object_type type;
|
|
|
|
|
|
|
|
type = (enum vm_special_object_type)value_type;
|
|
|
|
val = vm_get_special_object(GET_EP(), type);
|
2008-07-01 07:05:58 +04:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put string val. string will be copied. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
putstring
|
2007-07-02 16:49:35 +04:00
|
|
|
(VALUE str)
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2023-12-01 13:33:00 +03:00
|
|
|
val = rb_ec_str_resurrect(ec, str, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* put chilled string val. string will be copied but frozen in the future. */
|
|
|
|
DEFINE_INSN
|
|
|
|
putchilledstring
|
|
|
|
(VALUE str)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
val = rb_ec_str_resurrect(ec, str, true);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put concatenate strings */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
concatstrings
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t num)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2019-02-01 09:29:02 +03:00
|
|
|
/* This instruction can concat UTF-8 and binary strings, resulting in
|
2020-01-27 10:22:53 +03:00
|
|
|
* Encoding::CompatibilityError. */
|
2019-02-01 09:29:02 +03:00
|
|
|
// attr bool leaf = false; /* has rb_enc_cr_str_buf_cat() */
|
2018-11-07 10:16:50 +03:00
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)num;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = rb_str_concat_literals(num, STACK_ADDR_FROM_TOP(num));
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
/* Convert the result to string if not already a string.
|
|
|
|
This is used as a backup if to_s does not return a string. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
anytostring
|
2007-01-16 11:52:22 +03:00
|
|
|
()
|
2017-09-18 05:27:13 +03:00
|
|
|
(VALUE val, VALUE str)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE val)
|
|
|
|
{
|
2017-09-18 05:27:13 +03:00
|
|
|
val = rb_obj_as_string_result(str, val);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* compile str to Regexp and push it.
|
2016-01-10 05:07:00 +03:00
|
|
|
opt is the option for the Regexp.
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
toregexp
|
2008-01-29 11:03:51 +03:00
|
|
|
(rb_num_t opt, rb_num_t cnt)
|
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2019-02-01 09:29:02 +03:00
|
|
|
/* This instruction can raise RegexpError, thus can call
|
|
|
|
* RegexpError#initialize */
|
|
|
|
// attr bool leaf = false;
|
2018-11-07 10:16:50 +03:00
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)cnt;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2017-04-20 13:32:08 +03:00
|
|
|
const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt));
|
2009-06-30 11:46:44 +04:00
|
|
|
val = rb_reg_new_ary(ary, (int)opt);
|
2009-02-11 08:46:17 +03:00
|
|
|
rb_ary_clear(ary);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* intern str to Symbol and push it. */
|
2017-09-18 08:16:37 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
intern
|
|
|
|
()
|
|
|
|
(VALUE str)
|
|
|
|
(VALUE sym)
|
|
|
|
{
|
|
|
|
sym = rb_str_intern(str);
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put new array initialized with num values on the stack. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
newarray
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t num)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2018-11-07 10:16:50 +03:00
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)num;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2020-12-06 09:41:21 +03:00
|
|
|
val = rb_ec_ary_new_from_values(ec, num, STACK_ADDR_FROM_TOP(num));
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2019-09-02 18:21:30 +03:00
|
|
|
/* put new array initialized with num values on the stack. There
|
|
|
|
should be at least one element on the stack, and the top element
|
|
|
|
should be a hash. If the top element is empty, it is not
|
|
|
|
included in the array.
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
newarraykwsplat
|
|
|
|
(rb_num_t num)
|
|
|
|
(...)
|
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)num;
|
|
|
|
{
|
|
|
|
if (RHASH_EMPTY_P(*STACK_ADDR_FROM_TOP(1))) {
|
|
|
|
val = rb_ary_new4(num-1, STACK_ADDR_FROM_TOP(num));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
val = rb_ary_new4(num, STACK_ADDR_FROM_TOP(num));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add pushtoarraykwsplat instruction to avoid unnecessary array allocation
This is designed to replace the newarraykwsplat instruction, which is
no longer used in the parse.y compiler after this commit. This avoids
an unnecessary array allocation in the case where ARGSCAT is followed
by LIST with keyword:
```ruby
a = []
kw = {}
[*a, 1, **kw]
```
Previous Instructions:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 newhash 0 ( 2)[Li]
0006 setlocal_WC_0 kw@1
0008 getlocal_WC_0 a@0 ( 3)[Li]
0010 splatarray true
0012 putobject_INT2FIX_1_
0013 putspecialobject 1
0015 newhash 0
0017 getlocal_WC_0 kw@1
0019 opt_send_without_block <calldata!mid:core#hash_merge_kwd, argc:2, ARGS_SIMPLE>
0021 newarraykwsplat 2
0023 concattoarray
0024 leave
```
New Instructions:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 newhash 0 ( 2)[Li]
0006 setlocal_WC_0 kw@1
0008 getlocal_WC_0 a@0 ( 3)[Li]
0010 splatarray true
0012 putobject_INT2FIX_1_
0013 pushtoarray 1
0015 putspecialobject 1
0017 newhash 0
0019 getlocal_WC_0 kw@1
0021 opt_send_without_block <calldata!mid:core#hash_merge_kwd, argc:2, ARGS_SIMPLE>
0023 pushtoarraykwsplat
0024 leave
```
pushtoarraykwsplat is designed to be simpler than newarraykwsplat.
It does not take a variable number of arguments from the stack, it
pops the top of the stack, and appends it to the second from the top,
unless the top of the stack is an empty hash.
During this work, I found the ARGSPUSH followed by HASH with keyword
did not compile correctly, as it pushed the generated hash to the
array even if the hash was empty. This fixes the behavior, to use
pushtoarraykwsplat instead of pushtoarray in that case:
```ruby
a = []
kw = {}
[*a, **kw]
[{}] # Before
[] # After
```
This does not remove the newarraykwsplat instruction, as it is still
referenced in the prism compiler (which should be updated similar
to this), YJIT (only in the bindings, it does not appear to be
implemented), and RJIT (in a couple comments). After those are
updated, the newarraykwsplat instruction should be removed.
2024-01-30 21:31:27 +03:00
|
|
|
/* push hash onto array unless the hash is empty (as empty keyword
|
|
|
|
splats should be ignored).
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
pushtoarraykwsplat
|
|
|
|
()
|
|
|
|
(VALUE ary, VALUE hash)
|
|
|
|
(VALUE ary)
|
|
|
|
{
|
|
|
|
if (!RHASH_EMPTY_P(hash)) {
|
|
|
|
rb_ary_push(ary, hash);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* dup array */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
duparray
|
|
|
|
(VALUE ary)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2019-01-10 02:04:00 +03:00
|
|
|
RUBY_DTRACE_CREATE_HOOK(ARRAY, RARRAY_LEN(ary));
|
2009-02-18 08:33:36 +03:00
|
|
|
val = rb_ary_resurrect(ary);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
Speed up hash literals by duping
This commit replaces the `newhashfromarray` instruction with a `duphash`
instruction. Instead of allocating a new hash from an array stored in
the Instruction Sequences, store a hash directly in the instruction
sequences and dup it on execution.
== Instruction sequence changes ==
```ruby
code = <<-eorby
{ "foo" => "bar", "baz" => "lol" }
eorby
insns = RubyVM::InstructionSequence.compile(code, __FILE__, nil, 0, frozen_string_literal: true)
puts insns.disasm
```
On Ruby 2.5:
```
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)>====================
0000 putobject "foo"
0002 putobject "bar"
0004 putobject "baz"
0006 putobject "lol"
0008 newhash 4
0010 leave
```
Ruby 2.6@r66174 3b6321083a2e3525da3b34d08a0b68bac094bd7f:
```
$ ./ruby test.rb
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)> (catch: FALSE)
0000 newhashfromarray 2, ["foo", "bar", "baz", "lol"]
0003 leave
```
Ruby 2.6 + This commit:
```
$ ./ruby test.rb
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)> (catch: FALSE)
0000 duphash {"foo"=>"bar", "baz"=>"lol"}
0002 leave
```
== Benchmark Results ==
Compared to 2.5.3:
```
$ make benchmark ITEM=hash_literal_small COMPARE_RUBY=/Users/aaron/.rbenv/versions/2.5.3/bin/ruby
generating known_errors.inc
known_errors.inc unchanged
./revision.h unchanged
/Users/aaron/.rbenv/shims/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::/Users/aaron/.rbenv/versions/2.5.3/bin/ruby -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby -I./lib -I. -I.ext/common -r./prelude --disable-gem" \
$(find ./benchmark -maxdepth 1 -name '*hash_literal_small*.yml' -o -name '*hash_literal_small*.rb' | sort)
Calculating -------------------------------------
compare-ruby built-ruby
hash_literal_small2 1.498 1.877 i/s - 1.000 times in 0.667581s 0.532656s
hash_literal_small4 1.197 1.642 i/s - 1.000 times in 0.835375s 0.609160s
hash_literal_small8 0.620 1.215 i/s - 1.000 times in 1.611638s 0.823090s
Comparison:
hash_literal_small2
built-ruby: 1.9 i/s
compare-ruby: 1.5 i/s - 1.25x slower
hash_literal_small4
built-ruby: 1.6 i/s
compare-ruby: 1.2 i/s - 1.37x slower
hash_literal_small8
built-ruby: 1.2 i/s
compare-ruby: 0.6 i/s - 1.96x slower
```
Compared to r66255
```
$ make benchmark ITEM=hash_literal_small COMPARE_RUBY=/Users/aaron/.rbenv/versions/ruby-trunk/bin/ruby
generating known_errors.inc
known_errors.inc unchanged
./revision.h unchanged
/Users/aaron/.rbenv/shims/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::/Users/aaron/.rbenv/versions/ruby-trunk/bin/ruby -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby -I./lib -I. -I.ext/common -r./prelude --disable-gem" \
$(find ./benchmark -maxdepth 1 -name '*hash_literal_small*.yml' -o -name '*hash_literal_small*.rb' | sort)
Calculating -------------------------------------
compare-ruby built-ruby
hash_literal_small2 1.567 1.831 i/s - 1.000 times in 0.638056s 0.546039s
hash_literal_small4 1.298 1.652 i/s - 1.000 times in 0.770214s 0.605182s
hash_literal_small8 0.873 1.216 i/s - 1.000 times in 1.145304s 0.822047s
Comparison:
hash_literal_small2
built-ruby: 1.8 i/s
compare-ruby: 1.6 i/s - 1.17x slower
hash_literal_small4
built-ruby: 1.7 i/s
compare-ruby: 1.3 i/s - 1.27x slower
hash_literal_small8
built-ruby: 1.2 i/s
compare-ruby: 0.9 i/s - 1.39x slower
```
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66258 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-12-06 21:28:21 +03:00
|
|
|
/* dup hash */
|
|
|
|
DEFINE_INSN
|
|
|
|
duphash
|
|
|
|
(VALUE hash)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2019-01-10 02:04:00 +03:00
|
|
|
RUBY_DTRACE_CREATE_HOOK(HASH, RHASH_SIZE(hash) << 1);
|
2018-12-20 10:17:55 +03:00
|
|
|
val = rb_hash_resurrect(hash);
|
Speed up hash literals by duping
This commit replaces the `newhashfromarray` instruction with a `duphash`
instruction. Instead of allocating a new hash from an array stored in
the Instruction Sequences, store a hash directly in the instruction
sequences and dup it on execution.
== Instruction sequence changes ==
```ruby
code = <<-eorby
{ "foo" => "bar", "baz" => "lol" }
eorby
insns = RubyVM::InstructionSequence.compile(code, __FILE__, nil, 0, frozen_string_literal: true)
puts insns.disasm
```
On Ruby 2.5:
```
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)>====================
0000 putobject "foo"
0002 putobject "bar"
0004 putobject "baz"
0006 putobject "lol"
0008 newhash 4
0010 leave
```
Ruby 2.6@r66174 3b6321083a2e3525da3b34d08a0b68bac094bd7f:
```
$ ./ruby test.rb
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)> (catch: FALSE)
0000 newhashfromarray 2, ["foo", "bar", "baz", "lol"]
0003 leave
```
Ruby 2.6 + This commit:
```
$ ./ruby test.rb
== disasm: #<ISeq:<compiled>@test.rb:0 (0,0)-(0,36)> (catch: FALSE)
0000 duphash {"foo"=>"bar", "baz"=>"lol"}
0002 leave
```
== Benchmark Results ==
Compared to 2.5.3:
```
$ make benchmark ITEM=hash_literal_small COMPARE_RUBY=/Users/aaron/.rbenv/versions/2.5.3/bin/ruby
generating known_errors.inc
known_errors.inc unchanged
./revision.h unchanged
/Users/aaron/.rbenv/shims/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::/Users/aaron/.rbenv/versions/2.5.3/bin/ruby -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby -I./lib -I. -I.ext/common -r./prelude --disable-gem" \
$(find ./benchmark -maxdepth 1 -name '*hash_literal_small*.yml' -o -name '*hash_literal_small*.rb' | sort)
Calculating -------------------------------------
compare-ruby built-ruby
hash_literal_small2 1.498 1.877 i/s - 1.000 times in 0.667581s 0.532656s
hash_literal_small4 1.197 1.642 i/s - 1.000 times in 0.835375s 0.609160s
hash_literal_small8 0.620 1.215 i/s - 1.000 times in 1.611638s 0.823090s
Comparison:
hash_literal_small2
built-ruby: 1.9 i/s
compare-ruby: 1.5 i/s - 1.25x slower
hash_literal_small4
built-ruby: 1.6 i/s
compare-ruby: 1.2 i/s - 1.37x slower
hash_literal_small8
built-ruby: 1.2 i/s
compare-ruby: 0.6 i/s - 1.96x slower
```
Compared to r66255
```
$ make benchmark ITEM=hash_literal_small COMPARE_RUBY=/Users/aaron/.rbenv/versions/ruby-trunk/bin/ruby
generating known_errors.inc
known_errors.inc unchanged
./revision.h unchanged
/Users/aaron/.rbenv/shims/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::/Users/aaron/.rbenv/versions/ruby-trunk/bin/ruby -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby -I./lib -I. -I.ext/common -r./prelude --disable-gem" \
$(find ./benchmark -maxdepth 1 -name '*hash_literal_small*.yml' -o -name '*hash_literal_small*.rb' | sort)
Calculating -------------------------------------
compare-ruby built-ruby
hash_literal_small2 1.567 1.831 i/s - 1.000 times in 0.638056s 0.546039s
hash_literal_small4 1.298 1.652 i/s - 1.000 times in 0.770214s 0.605182s
hash_literal_small8 0.873 1.216 i/s - 1.000 times in 1.145304s 0.822047s
Comparison:
hash_literal_small2
built-ruby: 1.8 i/s
compare-ruby: 1.6 i/s - 1.17x slower
hash_literal_small4
built-ruby: 1.7 i/s
compare-ruby: 1.3 i/s - 1.27x slower
hash_literal_small8
built-ruby: 1.2 i/s
compare-ruby: 0.9 i/s - 1.39x slower
```
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66258 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-12-06 21:28:21 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* if TOS is an array expand, expand it to num objects.
|
2016-01-10 05:07:00 +03:00
|
|
|
if the number of the array is less than num, push nils to fill.
|
|
|
|
if it is greater than num, exceeding elements are dropped.
|
|
|
|
unless TOS is an array, push num - 1 nils.
|
|
|
|
if flags is non-zero, push the array of the rest elements.
|
|
|
|
flag: 0x01 - rest args array
|
|
|
|
flag: 0x02 - for postarg
|
|
|
|
flag: 0x04 - reverse?
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
expandarray
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t num, rb_num_t flag)
|
2007-01-16 11:52:22 +03:00
|
|
|
(..., VALUE ary)
|
2018-01-12 11:38:07 +03:00
|
|
|
(...)
|
2023-11-30 18:51:51 +03:00
|
|
|
// attr bool handles_sp = true;
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = false; /* has rb_check_array_type() */
|
2018-11-07 11:03:10 +03:00
|
|
|
// attr rb_snum_t sp_inc = (rb_snum_t)num - 1 + (flag & 1 ? 1 : 0);
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2023-11-30 18:51:51 +03:00
|
|
|
vm_expandarray(GET_CFP(), ary, num, (int)flag);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
/* concat two arrays, without modifying first array.
|
|
|
|
* attempts to convert both objects to arrays using to_a.
|
|
|
|
*/
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
concatarray
|
|
|
|
()
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
(VALUE ary1, VALUE ary2)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE ary)
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = false; /* has rb_check_array_type() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
ary = vm_concat_array(ary1, ary2);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
Add concattoarray VM instruction
This instruction is similar to concatarray, but assumes the first
object is already an array, and appends to it directly. This is
different than concatarray, which will create a new array instead
of appending to an existing array.
Additionally, for both concatarray and concattoarray, if the second
argument cannot be converted to an array, then just push it onto
the array, instead of creating a new array to wrap it, and then
using concat array. This saves an array allocation in that case.
This allows `f(*a, *a, *1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, *a, *1)` would generate
4 arrays:
* a dupped by splatarray true
* a dupped again by first concatarray
* 1 wrapped in array by third splatarray
* result of [*a, *a] dupped by second concatarray
Instructions Before for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 splatarray false
0013 concatarray
0014 putobject_INT2FIX_1_
0015 splatarray false
0017 concatarray
0018 opt_send_without_block <calldata!mid:g, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0020 leave
```
Instructions After for `a = []; f(*a, *a, *1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 getlocal_WC_0 a@0
0011 concattoarray
0012 putobject_INT2FIX_1_
0013 concattoarray
0014 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0016 leave
```
2023-11-25 07:23:58 +03:00
|
|
|
/* concat second array to first array.
|
|
|
|
* first argument must already be an array.
|
|
|
|
* attempts to convert second object to array using to_a.
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
concattoarray
|
|
|
|
()
|
|
|
|
(VALUE ary1, VALUE ary2)
|
|
|
|
(VALUE ary)
|
|
|
|
// attr bool leaf = false; /* has rb_check_array_type() */
|
|
|
|
{
|
|
|
|
ary = vm_concat_to_array(ary1, ary2);
|
|
|
|
}
|
|
|
|
|
Add pushtoarray VM instruction
This instruction is similar to concattoarray, but it takes the
number of arguments to push to the array, removes that number
of arguments from the stack, and adds them to the array now at
the top of the stack.
This allows `f(*a, 1)` to allocate only a single array on the
caller side (which can be reused on the callee side in the case of
`def f(*a)`). Prior to this commit, `f(*a, 1)` would generate
3 arrays:
* a dupped by splatarray true
* 1 wrapped in array by newarray
* a dupped again by concatarray
Instructions Before for `a = []; f(*a, 1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 putobject_INT2FIX_1_
0010 newarray 1
0012 concatarray
0013 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|FCALL>
0015 leave
```
Instructions After for `a = []; f(*a, 1)`:
```
0000 newarray 0 ( 1)[Li]
0002 setlocal_WC_0 a@0
0004 putself
0005 getlocal_WC_0 a@0
0007 splatarray true
0009 putobject_INT2FIX_1_
0010 pushtoarray 1
0012 opt_send_without_block <calldata!mid:f, argc:1, ARGS_SPLAT|ARGS_SPLAT_MUT|FCALL>
0014 leave
```
With these changes, method calls to Ruby methods should
implicitly allocate at most one array.
Ignore typeprof bundled gem failure due to unrecognized instruction.
2023-11-28 23:14:45 +03:00
|
|
|
/* push given number of objects to array directly before. */
|
|
|
|
DEFINE_INSN
|
|
|
|
pushtoarray
|
|
|
|
(rb_num_t num)
|
|
|
|
(...)
|
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = -(rb_snum_t)num;
|
|
|
|
{
|
|
|
|
const VALUE *objp = STACK_ADDR_FROM_TOP(num);
|
|
|
|
val = rb_ary_cat(*(objp-1), objp, num);
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* call to_a on array ary to splat */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
splatarray
|
|
|
|
(VALUE flag)
|
|
|
|
(VALUE ary)
|
|
|
|
(VALUE obj)
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = false; /* has rb_check_array_type() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
obj = vm_splat_array(flag, ary);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2023-11-09 02:56:53 +03:00
|
|
|
/* call to_hash on hash to keyword splat before converting block */
|
|
|
|
DEFINE_INSN
|
|
|
|
splatkw
|
|
|
|
()
|
|
|
|
(VALUE hash, VALUE block)
|
|
|
|
(VALUE obj, VALUE block)
|
|
|
|
// attr bool leaf = false; /* has rb_to_hash_type() */
|
|
|
|
{
|
2024-02-09 03:10:51 +03:00
|
|
|
if (NIL_P(hash)) {
|
|
|
|
obj = Qnil;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
obj = rb_to_hash_type(hash);
|
|
|
|
}
|
2023-11-09 02:56:53 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put new Hash from n elements. n must be an even number. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
newhash
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t num)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = false; /* has rb_hash_key_str() */
|
2018-11-07 10:16:50 +03:00
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)num;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2015-10-29 08:32:57 +03:00
|
|
|
RUBY_DTRACE_CREATE_HOOK(HASH, num);
|
* probes.d: add DTrace probe declarations. [ruby-core:27448]
* array.c (empty_ary_alloc, ary_new): added array create DTrace probe.
* compile.c (rb_insns_name): allowing DTrace probes to access
instruction sequence name.
* Makefile.in: translate probes.d file to appropriate header file.
* common.mk: declare dependencies on the DTrace header.
* configure.in: add a test for existence of DTrace.
* eval.c (setup_exception): add a probe for when an exception is
raised.
* gc.c: Add DTrace probes for mark begin and end, and sweep begin and
end.
* hash.c (empty_hash_alloc): Add a probe for hash allocation.
* insns.def: Add probes for function entry and return.
* internal.h: function declaration for compile.c change.
* load.c (rb_f_load): add probes for `load` entry and exit, require
entry and exit, and wrapping search_required for load path search.
* object.c (rb_obj_alloc): added a probe for general object creation.
* parse.y (yycompile0): added a probe around parse and compile phase.
* string.c (empty_str_alloc, str_new): DTrace probes for string
allocation.
* test/dtrace/*: tests for DTrace probes.
* vm.c (vm_invoke_proc): add probes for function return on exception
raise, hash create, and instruction sequence execution.
* vm_core.h: add probe declarations for function entry and exit.
* vm_dump.c: add probes header file.
* vm_eval.c (vm_call0_cfunc, vm_call0_cfunc_with_frame): add probe on
function entry and return.
* vm_exec.c: expose instruction number to instruction name function.
* vm_insnshelper.c: add function entry and exit probes for cfunc
methods.
* vm_insnhelper.h: vm usage information is always collected, so
uncomment the functions.
12 19:14:50 2012 Akinori MUSHA <knu@iDaemons.org>
* configure.in (isinf, isnan): isinf() and isnan() are macros on
DragonFly which cannot be found by AC_REPLACE_FUNCS(). This
workaround enforces the fact that they exist on DragonFly.
12 15:59:38 2012 Shugo Maeda <shugo@ruby-lang.org>
* vm_core.h (rb_call_info_t::refinements), compile.c (new_callinfo),
vm_insnhelper.c (vm_search_method): revert r37616 because it's too
slow. [ruby-dev:46477]
* test/ruby/test_refinement.rb (test_inline_method_cache): skip
the test until the bug is fixed efficiently.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37631 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-11-13 01:52:12 +04:00
|
|
|
|
2017-04-27 07:21:04 +03:00
|
|
|
if (num) {
|
2021-10-16 21:20:30 +03:00
|
|
|
val = rb_hash_new_with_size(num / 2);
|
2017-04-27 07:21:04 +03:00
|
|
|
rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
2021-10-16 21:20:30 +03:00
|
|
|
else {
|
|
|
|
val = rb_hash_new();
|
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* put new Range object.(Range.new(low, high, flag)) */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
newrange
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t flag)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE low, VALUE high)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* rb_range_new() exercises "bad value for range" check. */
|
|
|
|
// attr bool leaf = false; /* see also: range.c:range_init() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2009-06-30 11:46:44 +04:00
|
|
|
val = rb_range_new(low, high, (int)flag);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with stack operation */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* pop from stack. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
pop
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
()
|
|
|
|
{
|
2011-11-27 12:24:19 +04:00
|
|
|
(void)val;
|
2007-01-16 11:52:22 +03:00
|
|
|
/* none */
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* duplicate stack top. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
dup
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
(VALUE val1, VALUE val2)
|
|
|
|
{
|
|
|
|
val1 = val2 = val;
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* duplicate stack top n elements */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
dupn
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t n)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(...)
|
|
|
|
// attr rb_snum_t sp_inc = n;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
void *dst = GET_SP();
|
|
|
|
void *src = STACK_ADDR_FROM_TOP(n);
|
|
|
|
|
|
|
|
MEMCPY(dst, src, VALUE, n);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* swap top 2 vals */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
swap
|
|
|
|
()
|
|
|
|
(VALUE val, VALUE obj)
|
|
|
|
(VALUE obj, VALUE val)
|
|
|
|
{
|
|
|
|
/* none */
|
|
|
|
}
|
|
|
|
|
2022-07-20 22:24:50 +03:00
|
|
|
/* reverse stack top N order. */
|
|
|
|
DEFINE_INSN
|
2022-07-20 22:28:48 +03:00
|
|
|
opt_reverse
|
2022-07-20 22:24:50 +03:00
|
|
|
(rb_num_t n)
|
|
|
|
(...)
|
|
|
|
(...)
|
|
|
|
// attr rb_snum_t sp_inc = 0;
|
|
|
|
{
|
|
|
|
rb_num_t i;
|
|
|
|
VALUE *sp = STACK_ADDR_FROM_TOP(n);
|
|
|
|
|
|
|
|
for (i=0; i<n/2; i++) {
|
|
|
|
VALUE v0 = sp[i];
|
|
|
|
VALUE v1 = TOPN(i);
|
|
|
|
sp[i] = v1;
|
|
|
|
TOPN(i) = v0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* for stack caching. */
|
2019-09-02 20:51:48 +03:00
|
|
|
DEFINE_INSN_IF(STACK_CACHING)
|
2007-01-16 11:52:22 +03:00
|
|
|
reput
|
|
|
|
()
|
|
|
|
(..., VALUE val)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = 0;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
|
|
|
/* none */
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* get nth stack value from stack top */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
topn
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t n)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = 1;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
|
|
|
val = TOPN(n);
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* set Nth stack entry to stack top */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
setn
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t n)
|
2007-01-16 11:52:22 +03:00
|
|
|
(..., VALUE val)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = 0;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2018-07-19 16:25:22 +03:00
|
|
|
TOPN(n) = val;
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* empty current stack */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
2008-01-25 21:02:01 +03:00
|
|
|
adjuststack
|
|
|
|
(rb_num_t n)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(...)
|
2018-01-12 16:25:03 +03:00
|
|
|
// attr rb_snum_t sp_inc = -(rb_snum_t)n;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2018-07-19 16:25:22 +03:00
|
|
|
/* none */
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with setting */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* defined? */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
defined
|
2021-03-17 01:30:47 +03:00
|
|
|
(rb_num_t op_type, VALUE obj, VALUE pushval)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE v)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = leafness_of_defined(op_type);
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2021-03-17 01:25:37 +03:00
|
|
|
val = Qnil;
|
2021-03-17 01:16:51 +03:00
|
|
|
if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
|
2022-11-10 03:11:20 +03:00
|
|
|
val = pushval;
|
2021-03-17 01:12:37 +03:00
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2023-02-28 15:44:57 +03:00
|
|
|
/* defined?(@foo) */
|
|
|
|
DEFINE_INSN
|
2023-03-09 18:34:38 +03:00
|
|
|
definedivar
|
2023-02-28 15:44:57 +03:00
|
|
|
(ID id, IVC ic, VALUE pushval)
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
// attr bool leaf = false;
|
|
|
|
{
|
|
|
|
val = Qnil;
|
2024-01-30 08:48:59 +03:00
|
|
|
if (!UNDEF_P(vm_getivar(GET_SELF(), id, GET_ISEQ(), ic, NULL, FALSE, Qundef))) {
|
2023-02-28 15:44:57 +03:00
|
|
|
val = pushval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* check `target' matches `pattern'.
|
2012-08-08 11:52:19 +04:00
|
|
|
`flag & VM_CHECKMATCH_TYPE_MASK' describe how to check pattern.
|
|
|
|
VM_CHECKMATCH_TYPE_WHEN: ignore target and check pattern is truthy.
|
|
|
|
VM_CHECKMATCH_TYPE_CASE: check `patten === target'.
|
2021-09-24 00:14:04 +03:00
|
|
|
VM_CHECKMATCH_TYPE_RESCUE: check `pattern.kind_of?(Module) && pattern === target'.
|
2012-08-08 11:52:19 +04:00
|
|
|
if `flag & VM_CHECKMATCH_ARRAY' is not 0, then `patten' is array of patterns.
|
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
checkmatch
|
|
|
|
(rb_num_t flag)
|
|
|
|
(VALUE target, VALUE pattern)
|
|
|
|
(VALUE result)
|
2018-09-11 12:48:58 +03:00
|
|
|
// attr bool leaf = leafness_of_checkmatch(flag);
|
2012-08-08 11:52:19 +04:00
|
|
|
{
|
2017-11-16 09:10:31 +03:00
|
|
|
result = vm_check_match(ec, target, pattern, flag);
|
2012-08-08 11:52:19 +04:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* check keywords are specified or not. */
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
checkkeyword
|
2017-12-23 03:51:36 +03:00
|
|
|
(lindex_t kw_bits_index, lindex_t keyword_index)
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
()
|
|
|
|
(VALUE ret)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
ret = vm_check_keyword(kw_bits_index, keyword_index, GET_EP());
|
* rewrite method/block parameter fitting logic to optimize
keyword arguments/parameters and a splat argument.
[Feature #10440] (Details are described in this ticket)
Most of complex part is moved to vm_args.c.
Now, ISeq#to_a does not catch up new instruction format.
* vm_core.h: change iseq data structures.
* introduce rb_call_info_kw_arg_t to represent keyword arguments.
* add rb_call_info_t::kw_arg.
* rename rb_iseq_t::arg_post_len to rb_iseq_t::arg_post_num.
* rename rb_iseq_t::arg_keywords to arg_keyword_num.
* rename rb_iseq_t::arg_keyword to rb_iseq_t::arg_keyword_bits.
to represent keyword bitmap parameter index.
This bitmap parameter shows that which keyword parameters are given
or not given (0 for given).
It is refered by `checkkeyword' instruction described bellow.
* rename rb_iseq_t::arg_keyword_check to rb_iseq_t::arg_keyword_rest
to represent keyword rest parameter index.
* add rb_iseq_t::arg_keyword_default_values to represent default
keyword values.
* rename VM_CALL_ARGS_SKIP_SETUP to VM_CALL_ARGS_SIMPLE
to represent
(ci->flag & (SPLAT|BLOCKARG)) &&
ci->blockiseq == NULL &&
ci->kw_arg == NULL.
* vm_insnhelper.c, vm_args.c: rewrite with refactoring.
* rewrite splat argument code.
* rewrite keyword arguments/parameters code.
* merge method and block parameter fitting code into one code base.
* vm.c, vm_eval.c: catch up these changes.
* compile.c (new_callinfo): callinfo requires kw_arg parameter.
* compile.c (compile_array_): check the last argument Hash object or
not. If Hash object and all keys are Symbol literals, they are
compiled to keyword arguments.
* insns.def (checkkeyword): add new instruction.
This instruction check the availability of corresponding keyword.
For example, a method "def foo k1: 'v1'; end" is cimpiled to the
following instructions.
0000 checkkeyword 2, 0 # check k1 is given.
0003 branchif 9 # if given, jump to address #9
0005 putstring "v1"
0007 setlocal_OP__WC__0 3 # k1 = 'v1'
0009 trace 8
0011 putnil
0012 trace 16
0014 leave
* insns.def (opt_send_simple): removed and add new instruction
"opt_send_without_block".
* parse.y (new_args_tail_gen): reorder variables.
Before this patch, a method "def foo(k1: 1, kr1:, k2: 2, **krest, &b)"
has parameter variables "k1, kr1, k2, &b, internal_id, krest",
but this patch reorders to "kr1, k1, k2, internal_id, krest, &b".
(locate a block variable at last)
* parse.y (vtable_pop): added.
This function remove latest `n' variables from vtable.
* iseq.c: catch up iseq data changes.
* proc.c: ditto.
* class.c (keyword_error): export as rb_keyword_error().
* common.mk: depend vm_args.c for vm.o.
* hash.c (rb_hash_has_key): export.
* internal.h: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48239 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2014-11-02 21:02:55 +03:00
|
|
|
}
|
|
|
|
|
2018-04-21 13:52:52 +03:00
|
|
|
/* check if val is type. */
|
|
|
|
DEFINE_INSN
|
|
|
|
checktype
|
|
|
|
(rb_num_t type)
|
|
|
|
(VALUE val)
|
|
|
|
(VALUE ret)
|
|
|
|
{
|
2022-01-01 10:57:22 +03:00
|
|
|
ret = RBOOL(TYPE(val) == (int)type);
|
2018-04-21 13:52:52 +03:00
|
|
|
}
|
|
|
|
|
2007-01-16 11:52:22 +03:00
|
|
|
/**********************************************************/
|
|
|
|
/* deal with control flow 1: class/module */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* enter class definition scope. if super is Qfalse, and class
|
2021-04-26 23:59:59 +03:00
|
|
|
"klass" is defined, it's redefined. Otherwise, define "klass" class.
|
2007-01-16 11:52:22 +03:00
|
|
|
*/
|
|
|
|
DEFINE_INSN
|
|
|
|
defineclass
|
2012-12-20 12:13:53 +04:00
|
|
|
(ID id, ISEQ class_iseq, rb_num_t flags)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE cbase, VALUE super)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
VALUE klass = vm_find_or_create_class_by_id(id, flags, cbase, super);
|
2007-08-12 23:09:15 +04:00
|
|
|
|
2015-12-08 16:58:50 +03:00
|
|
|
rb_iseq_check(class_iseq);
|
|
|
|
|
2007-01-16 11:52:22 +03:00
|
|
|
/* enter scope */
|
2017-10-27 09:21:50 +03:00
|
|
|
vm_push_frame(ec, class_iseq, VM_FRAME_MAGIC_CLASS | VM_ENV_FLAG_LOCAL, klass,
|
2016-07-28 14:02:30 +03:00
|
|
|
GET_BLOCK_HANDLER(),
|
2021-12-03 02:53:39 +03:00
|
|
|
(VALUE)vm_cref_push(ec, klass, NULL, FALSE, FALSE),
|
2022-03-23 22:19:48 +03:00
|
|
|
ISEQ_BODY(class_iseq)->iseq_encoded, GET_SP(),
|
|
|
|
ISEQ_BODY(class_iseq)->local_table_size,
|
|
|
|
ISEQ_BODY(class_iseq)->stack_max);
|
mjit_compile.c: use local variables for stack
if catch_except_p is FALSE. If catch_except_p is TRUE, stack values
should be on VM's stack when exception is thrown and the JIT-ed frame
is re-executed by VM's exception handler. If it's FALSE, the JIT-ed
frame won't be re-executed and don't need to keep values on VM's stack.
Using local variables allows us to reduce cfp->sp motion. Moving cfp->sp
is needed only for insns whose handles_frame? is false. So it improves
performance.
_mjit_compile_insn.erb: Prepare `stack_size` variable for GET_SP,
STACK_ADDR_FROM_TOP, TOPN macros. Share pc and sp motion partial view.
Use cancel handler created in mjit_compile.c.
_mjit_compile_send.erb: ditto. Also, when iseq->body->catch_except_p is
TRUE, this stops to call mjit_exec directly. I described the reason in
vm_insnhelper.h's comment for EXEC_EC_CFP.
_mjit_compile_pc_and_sp.erb: Shared logic for moving sp and pc. As you
can see from thsi file, when status->local_stack_p is TRUE and
insn.handles_frame? is false, moving sp is skipped. But if
insn.handles_frame? is true, values should be rolled back to VM's stack.
common.mk: add dependency for the file
_mjit_compile_insn_body.erb: Set sp value before canceling JIT on
DISPATCH_ORIGINAL_INSN. Replace GET_SP, STACK_ADDR_FROM_TOP, TOPN macros
for the case ocal_stack_p is TRUE and insn.handles_frame? is false.
In that case, values are not available on VM's stack and those macros
should be replaced.
mjit_compile.inc.erb: updated comments of macros which are supported by
JIT compiler. All references to `cfp->sp` should be replaced and thus
INC_SP, SET_SV, PUSH are no longer supported for now, because they are
not used now.
vm_exec.h: moved EXEC_EC_CFP definition to vm_insnhelper.h because it's
tighly coupled to CALL_METHOD.
vm_insnhelper.h: Have revised EXEC_EC_CFP definition moved from vm_exec.h.
Now it triggers mjit_exec for VM, and has the guard for catch_except_p
on JIT-ed code. See comments for details. CALL_METHOD delegates
triggering mjit_exec to EXEC_EC_CFP.
insns.def: Stopped using EXEC_EC_CFP for the case we don't want to
trigger mjit_exec. Those insns (defineclass, opt_call_c_function) are
not supported by JIT and it's safe to use RESTORE_REGS(), NEXT_INSN().
expandarray is changed to pass GET_SP() to replace the macro in
_mjit_compile_insn_body.erb.
vm_insnhelper.c: change to take sp for the above reason.
[close https://github.com/ruby/ruby/pull/1828]
This patch resurrects the performance which was attached in
[Feature #14235].
* Benchmark
Optcarrot (with configuration for benchmark_driver.gem)
https://github.com/benchmark-driver/optcarrot
$ benchmark-driver benchmark.yml --verbose 1 --rbenv 'before;before+JIT::before,--jit;after;after+JIT::after,--jit' --repeat-count 10
before: ruby 2.6.0dev (2018-03-04 trunk 62652) [x86_64-linux]
before+JIT: ruby 2.6.0dev (2018-03-04 trunk 62652) +JIT [x86_64-linux]
after: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) [x86_64-linux]
last_commit=mjit_compile.c: use local variables for stack
after+JIT: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) +JIT [x86_64-linux]
last_commit=mjit_compile.c: use local variables for stack
Calculating -------------------------------------
before before+JIT after after+JIT
optcarrot 53.552 59.680 53.697 63.358 fps
Comparison:
optcarrot
after+JIT: 63.4 fps
before+JIT: 59.7 fps - 1.06x slower
after: 53.7 fps - 1.18x slower
before: 53.6 fps - 1.18x slower
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62655 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-03-04 10:04:40 +03:00
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2019-04-05 11:15:11 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
definemethod
|
|
|
|
(ID id, ISEQ iseq)
|
|
|
|
()
|
|
|
|
()
|
|
|
|
{
|
|
|
|
vm_define_method(ec, Qnil, id, (VALUE)iseq, FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_INSN
|
|
|
|
definesmethod
|
|
|
|
(ID id, ISEQ iseq)
|
|
|
|
(VALUE obj)
|
|
|
|
()
|
|
|
|
{
|
|
|
|
vm_define_method(ec, obj, id, (VALUE)iseq, TRUE);
|
|
|
|
}
|
|
|
|
|
2007-01-16 11:52:22 +03:00
|
|
|
/**********************************************************/
|
|
|
|
/* deal with control flow 2: method/iterator */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* invoke method. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
send
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd, ISEQ blockiseq)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
|
2019-07-31 04:36:05 +03:00
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
|
2024-06-04 00:20:04 +03:00
|
|
|
{
|
|
|
|
VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, false);
|
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_method);
|
|
|
|
JIT_EXEC(ec, val);
|
|
|
|
|
|
|
|
if (UNDEF_P(val)) {
|
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* invoke forward method. */
|
|
|
|
DEFINE_INSN
|
|
|
|
sendforward
|
|
|
|
(CALL_DATA cd, ISEQ blockiseq)
|
|
|
|
(...)
|
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
|
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
struct rb_forwarding_call_data adjusted_cd;
|
|
|
|
struct rb_callinfo adjusted_ci;
|
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
VALUE bh = vm_caller_setup_fwd_args(GET_EC(), GET_CFP(), cd, blockiseq, 0, &adjusted_cd, &adjusted_ci);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), &adjusted_cd.cd, bh, mexp_search_method);
|
2023-03-16 20:41:12 +03:00
|
|
|
JIT_EXEC(ec, val);
|
2023-03-14 23:39:06 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (cd->cc != adjusted_cd.cd.cc && vm_cc_markable(adjusted_cd.cd.cc)) {
|
|
|
|
RB_OBJ_WRITE(GET_ISEQ(), &cd->cc, adjusted_cd.cd.cc);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-12-26 03:59:37 +03:00
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-09-26 11:11:05 +03:00
|
|
|
/* Invoke method without block */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_send_without_block
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2018-09-26 11:11:05 +03:00
|
|
|
(...)
|
|
|
|
(VALUE val)
|
|
|
|
// attr bool handles_sp = true;
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
|
2019-07-31 04:36:05 +03:00
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
|
2018-09-26 11:11:05 +03:00
|
|
|
{
|
2018-12-26 03:59:37 +03:00
|
|
|
VALUE bh = VM_BLOCK_HANDLER_NONE;
|
2020-12-17 09:46:36 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_method);
|
2023-03-16 20:41:12 +03:00
|
|
|
JIT_EXEC(ec, val);
|
2023-03-14 23:39:06 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-12-26 03:59:37 +03:00
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
|
|
|
}
|
2018-09-26 11:11:05 +03:00
|
|
|
}
|
|
|
|
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
/* Convert object to string using to_s or equivalent. */
|
|
|
|
DEFINE_INSN
|
|
|
|
objtostring
|
|
|
|
(CALL_DATA cd)
|
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
// attr bool leaf = false;
|
|
|
|
{
|
|
|
|
val = vm_objtostring(GET_ISEQ(), recv, cd);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
Optimize dynamic string interpolation for symbol/true/false/nil/0-9
This provides a significant speedup for symbol, true, false,
nil, and 0-9, class/module, and a small speedup in most other cases.
Speedups (using included benchmarks):
:symbol :: 60%
0-9 :: 50%
Class/Module :: 50%
nil/true/false :: 20%
integer :: 10%
[] :: 10%
"" :: 3%
One reason this approach is faster is it reduces the number of
VM instructions for each interpolated value.
Initial idea, approach, and benchmarks from Eric Wong. I applied
the same approach against the master branch, updating it to handle
the significant internal changes since this was first proposed 4
years ago (such as CALL_INFO/CALL_CACHE -> CALL_DATA). I also
expanded it to optimize true/false/nil/0-9/class/module, and added
handling of missing methods, refined methods, and RUBY_DEBUG.
This renames the tostring insn to anytostring, and adds an
objtostring insn that implements the optimization. This requires
making a few functions non-static, and adding some non-static
functions.
This disables 4 YJIT tests. Those tests should be reenabled after
YJIT optimizes the new objtostring insn.
Implements [Feature #13715]
Co-authored-by: Eric Wong <e@80x24.org>
Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Yusuke Endoh <mame@ruby-lang.org>
Co-authored-by: Koichi Sasada <ko1@atdot.net>
2021-11-19 02:10:20 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-10 01:17:06 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_str_freeze
|
2019-07-31 04:36:05 +03:00
|
|
|
(VALUE str, CALL_DATA cd)
|
2013-11-10 01:17:06 +04:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2018-06-27 04:10:02 +03:00
|
|
|
val = vm_opt_str_freeze(str, BOP_FREEZE, idFreeze);
|
2018-09-12 06:39:36 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-12 07:04:31 +03:00
|
|
|
PUSH(rb_str_resurrect(str));
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2018-09-12 06:39:36 +03:00
|
|
|
}
|
2013-11-10 01:17:06 +04:00
|
|
|
}
|
|
|
|
|
2019-08-02 17:25:38 +03:00
|
|
|
/* optimized nil? */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_nil_p
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2019-08-02 17:25:38 +03:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
val = vm_opt_nil_p(GET_ISEQ(), cd, recv);
|
2019-08-02 17:25:38 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2019-08-02 17:25:38 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-27 09:12:37 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_str_uminus
|
2019-07-31 04:36:05 +03:00
|
|
|
(VALUE str, CALL_DATA cd)
|
2017-03-27 09:12:37 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2018-06-27 04:10:02 +03:00
|
|
|
val = vm_opt_str_freeze(str, BOP_UMINUS, idUMinus);
|
2018-09-12 06:39:36 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-12 07:04:31 +03:00
|
|
|
PUSH(rb_str_resurrect(str));
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2018-09-12 06:39:36 +03:00
|
|
|
}
|
2017-03-27 09:12:37 +03:00
|
|
|
}
|
|
|
|
|
2016-03-17 15:47:31 +03:00
|
|
|
DEFINE_INSN
|
Emit special instruction for array literal + .(hash|min|max)
This commit introduces a new instruction `opt_newarray_send` which is
used when there is an array literal followed by either the `hash`,
`min`, or `max` method.
```
[a, b, c].hash
```
Will emit an `opt_newarray_send` instruction. This instruction falls
back to a method call if the "interested" method has been monkey
patched.
Here are some examples of the instructions generated:
```
$ ./miniruby --dump=insns -e '[@a, @b].max'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :max
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].min'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :min
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].hash'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :hash
0009 leave
```
[Feature #18897] [ruby-core:109147]
Co-authored-by: John Hawthorn <jhawthorn@github.com>
2022-06-07 03:27:56 +03:00
|
|
|
opt_newarray_send
|
|
|
|
(rb_num_t num, ID method)
|
2016-03-17 15:47:31 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* This instruction typically has no funcalls. But it compares array
|
|
|
|
* contents each other by nature. That part could call methods when
|
|
|
|
* necessary. No way to detect such method calls beforehand. We
|
|
|
|
* cannot but mark it being not leaf. */
|
|
|
|
// attr bool leaf = false; /* has rb_funcall() */
|
2018-11-07 10:16:50 +03:00
|
|
|
// attr rb_snum_t sp_inc = 1 - (rb_snum_t)num;
|
Emit special instruction for array literal + .(hash|min|max)
This commit introduces a new instruction `opt_newarray_send` which is
used when there is an array literal followed by either the `hash`,
`min`, or `max` method.
```
[a, b, c].hash
```
Will emit an `opt_newarray_send` instruction. This instruction falls
back to a method call if the "interested" method has been monkey
patched.
Here are some examples of the instructions generated:
```
$ ./miniruby --dump=insns -e '[@a, @b].max'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :max
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].min'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :min
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].hash'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :hash
0009 leave
```
[Feature #18897] [ruby-core:109147]
Co-authored-by: John Hawthorn <jhawthorn@github.com>
2022-06-07 03:27:56 +03:00
|
|
|
// attr rb_snum_t comptime_sp_inc = 1 - (rb_snum_t)num;
|
|
|
|
{
|
|
|
|
switch(method) {
|
|
|
|
case idHash:
|
|
|
|
val = vm_opt_newarray_hash(ec, num, STACK_ADDR_FROM_TOP(num));
|
|
|
|
break;
|
|
|
|
case idMin:
|
|
|
|
val = vm_opt_newarray_min(ec, num, STACK_ADDR_FROM_TOP(num));
|
|
|
|
break;
|
|
|
|
case idMax:
|
|
|
|
val = vm_opt_newarray_max(ec, num, STACK_ADDR_FROM_TOP(num));
|
|
|
|
break;
|
Introduce a specialize instruction for Array#pack
Instructions for this code:
```ruby
# frozen_string_literal: true
[a].pack("C")
```
Before this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 newarray 1
0005 putobject "C"
0007 opt_send_without_block <calldata!mid:pack, argc:1, ARGS_SIMPLE>
0009 leave
```
After this commit:
```
== disasm: #<ISeq:<main>@test.rb:1 (1,0)-(3,13)>
0000 putself ( 3)[Li]
0001 opt_send_without_block <calldata!mid:a, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0003 putobject "C"
0005 opt_newarray_send 2, :pack
0008 leave
```
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
2024-05-23 21:23:26 +03:00
|
|
|
case idPack:
|
|
|
|
val = rb_vm_opt_newarray_pack(ec, (long)num-1, STACK_ADDR_FROM_TOP(num), TOPN(0));
|
|
|
|
break;
|
Emit special instruction for array literal + .(hash|min|max)
This commit introduces a new instruction `opt_newarray_send` which is
used when there is an array literal followed by either the `hash`,
`min`, or `max` method.
```
[a, b, c].hash
```
Will emit an `opt_newarray_send` instruction. This instruction falls
back to a method call if the "interested" method has been monkey
patched.
Here are some examples of the instructions generated:
```
$ ./miniruby --dump=insns -e '[@a, @b].max'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :max
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].min'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,12)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :min
0009 leave
$ ./miniruby --dump=insns -e '[@a, @b].hash'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 getinstancevariable :@a, <is:0> ( 1)[Li]
0003 getinstancevariable :@b, <is:1>
0006 opt_newarray_send 2, :hash
0009 leave
```
[Feature #18897] [ruby-core:109147]
Co-authored-by: John Hawthorn <jhawthorn@github.com>
2022-06-07 03:27:56 +03:00
|
|
|
default:
|
|
|
|
rb_bug("unreachable");
|
|
|
|
}
|
2016-03-17 15:47:31 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* super(args) # args.size => num */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
invokesuper
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd, ISEQ blockiseq)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
|
2019-07-31 04:36:05 +03:00
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
|
2024-06-04 00:20:04 +03:00
|
|
|
{
|
|
|
|
VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, true);
|
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_super);
|
|
|
|
JIT_EXEC(ec, val);
|
|
|
|
|
|
|
|
if (UNDEF_P(val)) {
|
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* super(args) # args.size => num */
|
|
|
|
DEFINE_INSN
|
|
|
|
invokesuperforward
|
|
|
|
(CALL_DATA cd, ISEQ blockiseq)
|
|
|
|
(...)
|
|
|
|
(VALUE val)
|
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
|
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
struct rb_forwarding_call_data adjusted_cd;
|
|
|
|
struct rb_callinfo adjusted_ci;
|
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
VALUE bh = vm_caller_setup_fwd_args(GET_EC(), GET_CFP(), cd, blockiseq, 1, &adjusted_cd, &adjusted_ci);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), &adjusted_cd.cd, bh, mexp_search_super);
|
2023-03-16 20:41:12 +03:00
|
|
|
JIT_EXEC(ec, val);
|
2023-03-14 23:39:06 +03:00
|
|
|
|
2024-06-04 01:48:13 +03:00
|
|
|
if (cd->cc != adjusted_cd.cd.cc && vm_cc_markable(adjusted_cd.cd.cc)) {
|
|
|
|
RB_OBJ_WRITE(GET_ISEQ(), &cd->cc, adjusted_cd.cd.cc);
|
Optimized forwarding callers and callees
This patch optimizes forwarding callers and callees. It only optimizes methods that only take `...` as their parameter, and then pass `...` to other calls.
Calls it optimizes look like this:
```ruby
def bar(a) = a
def foo(...) = bar(...) # optimized
foo(123)
```
```ruby
def bar(a) = a
def foo(...) = bar(1, 2, ...) # optimized
foo(123)
```
```ruby
def bar(*a) = a
def foo(...)
list = [1, 2]
bar(*list, ...) # optimized
end
foo(123)
```
All variants of the above but using `super` are also optimized, including a bare super like this:
```ruby
def foo(...)
super
end
```
This patch eliminates intermediate allocations made when calling methods that accept `...`.
We can observe allocation elimination like this:
```ruby
def m
x = GC.stat(:total_allocated_objects)
yield
GC.stat(:total_allocated_objects) - x
end
def bar(a) = a
def foo(...) = bar(...)
def test
m { foo(123) }
end
test
p test # allocates 1 object on master, but 0 objects with this patch
```
```ruby
def bar(a, b:) = a + b
def foo(...) = bar(...)
def test
m { foo(1, b: 2) }
end
test
p test # allocates 2 objects on master, but 0 objects with this patch
```
How does it work?
-----------------
This patch works by using a dynamic stack size when passing forwarded parameters to callees.
The caller's info object (known as the "CI") contains the stack size of the
parameters, so we pass the CI object itself as a parameter to the callee.
When forwarding parameters, the forwarding ISeq uses the caller's CI to determine how much stack to copy, then copies the caller's stack before calling the callee.
The CI at the forwarded call site is adjusted using information from the caller's CI.
I think this description is kind of confusing, so let's walk through an example with code.
```ruby
def delegatee(a, b) = a + b
def delegator(...)
delegatee(...) # CI2 (FORWARDING)
end
def caller
delegator(1, 2) # CI1 (argc: 2)
end
```
Before we call the delegator method, the stack looks like this:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # |
5| delegatee(...) # CI2 (FORWARDING) |
6| end |
7| |
8| def caller |
-> 9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The ISeq for `delegator` is tagged as "forwardable", so when `caller` calls in
to `delegator`, it writes `CI1` on to the stack as a local variable for the
`delegator` method. The `delegator` method has a special local called `...`
that holds the caller's CI object.
Here is the ISeq disasm fo `delegator`:
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
The local called `...` will contain the caller's CI: CI1.
Here is the stack when we enter `delegator`:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
-> 4| # | CI1 (argc: 2)
5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller |
9| delegator(1, 2) # CI1 (argc: 2) |
10| end |
```
The CI at `delegatee` on line 5 is tagged as "FORWARDING", so it knows to
memcopy the caller's stack before calling `delegatee`. In this case, it will
memcopy self, 1, and 2 to the stack before calling `delegatee`. It knows how much
memory to copy from the caller because `CI1` contains stack size information
(argc: 2).
Before executing the `send` instruction, we push `...` on the stack. The
`send` instruction pops `...`, and because it is tagged with `FORWARDING`, it
knows to memcopy (using the information in the CI it just popped):
```
== disasm: #<ISeq:delegator@-e:1 (1,0)-(1,39)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] "..."@0
0000 putself ( 1)[LiCa]
0001 getlocal_WC_0 "..."@0
0003 send <calldata!mid:delegatee, argc:0, FCALL|FORWARDING>, nil
0006 leave [Re]
```
Instruction 001 puts the caller's CI on the stack. `send` is tagged with
FORWARDING, so it reads the CI and _copies_ the callers stack to this stack:
```
Executing Line | Code | Stack
---------------+---------------------------------------+--------
1| def delegatee(a, b) = a + b | self
2| | 1
3| def delegator(...) | 2
4| # | CI1 (argc: 2)
-> 5| delegatee(...) # CI2 (FORWARDING) | cref_or_me
6| end | specval
7| | type
8| def caller | self
9| delegator(1, 2) # CI1 (argc: 2) | 1
10| end | 2
```
The "FORWARDING" call site combines information from CI1 with CI2 in order
to support passing other values in addition to the `...` value, as well as
perfectly forward splat args, kwargs, etc.
Since we're able to copy the stack from `caller` in to `delegator`'s stack, we
can avoid allocating objects.
I want to do this to eliminate object allocations for delegate methods.
My long term goal is to implement `Class#new` in Ruby and it uses `...`.
I was able to implement `Class#new` in Ruby
[here](https://github.com/ruby/ruby/pull/9289).
If we adopt the technique in this patch, then we can optimize allocating
objects that take keyword parameters for `initialize`.
For example, this code will allocate 2 objects: one for `SomeObject`, and one
for the kwargs:
```ruby
SomeObject.new(foo: 1)
```
If we combine this technique, plus implement `Class#new` in Ruby, then we can
reduce allocations for this common operation.
Co-Authored-By: John Hawthorn <john@hawthorn.email>
Co-Authored-By: Alan Wu <XrXr@users.noreply.github.com>
2024-04-15 20:48:53 +03:00
|
|
|
}
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-12-26 03:59:37 +03:00
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* yield(args) */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
invokeblock
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(...)
|
2018-01-12 11:38:07 +03:00
|
|
|
(VALUE val)
|
2018-07-25 17:55:43 +03:00
|
|
|
// attr bool handles_sp = true;
|
VALUE size packed callinfo (ci).
Now, rb_call_info contains how to call the method with tuple of
(mid, orig_argc, flags, kwarg). Most of cases, kwarg == NULL and
mid+argc+flags only requires 64bits. So this patch packed
rb_call_info to VALUE (1 word) on such cases. If we can not
represent it in VALUE, then use imemo_callinfo which contains
conventional callinfo (rb_callinfo, renamed from rb_call_info).
iseq->body->ci_kw_size is removed because all of callinfo is VALUE
size (packed ci or a pointer to imemo_callinfo).
To access ci information, we need to use these functions:
vm_ci_mid(ci), _flag(ci), _argc(ci), _kwarg(ci).
struct rb_call_info_kw_arg is renamed to rb_callinfo_kwarg.
rb_funcallv_with_cc() and rb_method_basic_definition_p_with_cc()
is temporary removed because cd->ci should be marked.
2020-01-08 02:20:36 +03:00
|
|
|
// attr rb_snum_t sp_inc = sp_inc_of_invokeblock(cd->ci);
|
2019-07-31 04:36:05 +03:00
|
|
|
// attr rb_snum_t comptime_sp_inc = sp_inc_of_invokeblock(ci);
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2018-12-26 03:59:37 +03:00
|
|
|
VALUE bh = VM_BLOCK_HANDLER_NONE;
|
2020-12-17 09:46:36 +03:00
|
|
|
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_invokeblock);
|
2023-03-16 20:41:12 +03:00
|
|
|
JIT_EXEC(ec, val);
|
2023-03-14 23:39:06 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-12-26 03:59:37 +03:00
|
|
|
RESTORE_REGS();
|
|
|
|
NEXT_INSN();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* return from this scope. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
leave
|
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* This is super surprising but when leaving from a frame, we check
|
|
|
|
* for interrupts. If any, that should be executed on top of the
|
|
|
|
* current execution context. This is a method call. */
|
|
|
|
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
|
2018-07-25 17:55:43 +03:00
|
|
|
// attr bool handles_sp = true;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
|
|
|
if (OPT_CHECKED_RUN) {
|
2019-03-29 09:36:48 +03:00
|
|
|
const VALUE *const bp = vm_base_ptr(GET_CFP());
|
|
|
|
if (GET_SP() != bp) {
|
|
|
|
vm_stack_consistency_error(ec, GET_CFP(), bp);
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-27 09:21:50 +03:00
|
|
|
if (vm_pop_frame(ec, GET_CFP(), GET_EP())) {
|
2007-06-27 12:21:21 +04:00
|
|
|
#if OPT_CALL_THREADED_CODE
|
2017-10-27 22:16:51 +03:00
|
|
|
rb_ec_thread_ptr(ec)->retval = val;
|
2012-08-07 15:13:57 +04:00
|
|
|
return 0;
|
2007-06-27 12:21:21 +04:00
|
|
|
#else
|
* vm_core.h: remove VM_FRAME_MAGIC_FINISH (finish frame type).
Before this commit:
`finish frame' was place holder which indicates that VM loop
needs to return function.
If a C method calls a Ruby methods (a method written by Ruby),
then VM loop will be (re-)invoked. When the Ruby method returns,
then also VM loop should be escaped. `finish frame' has only
one instruction `finish', which returns VM loop function.
VM loop function executes `finish' instruction, then VM loop
function returns itself.
With such mechanism, `leave' instruction (which returns one
frame from current scope) doesn't need to check that this `leave'
should also return from VM loop function.
Strictly, one branch can be removed from `leave' instructon.
Consideration:
However, pushing the `finish frame' needs costs because
it needs several memory accesses. The number of pushing
`finish frame' is greater than I had assumed. Of course,
pushing `finish frame' consumes additional control frame.
Moreover, recent processors has good branch prediction,
with which we can ignore such trivial checking.
After this commit:
Finally, I decide to remove `finish frame' and `finish'
instruction. Some parts of VM depend on `finish frame',
so the new frame flag VM_FRAME_FLAG_FINISH is introduced.
If this frame should escape from VM function loop, then
the result of VM_FRAME_TYPE_FINISH_P(cfp) is true.
`leave' instruction checks this flag every time.
I measured performance on it. However on my environments,
it improves some benchmarks and slows some benchmarks down.
Maybe it is because of C compiler optimization parameters.
I'll re-visit here if this cause problems.
* insns.def (leave, finish): remove finish instruction.
* vm.c, vm_eval.c, vm_exec.c, vm_backtrace.c, vm_dump.c:
apply above changes.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36099 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-06-15 14:22:34 +04:00
|
|
|
return val;
|
2007-06-27 12:21:21 +04:00
|
|
|
#endif
|
* vm_core.h: remove VM_FRAME_MAGIC_FINISH (finish frame type).
Before this commit:
`finish frame' was place holder which indicates that VM loop
needs to return function.
If a C method calls a Ruby methods (a method written by Ruby),
then VM loop will be (re-)invoked. When the Ruby method returns,
then also VM loop should be escaped. `finish frame' has only
one instruction `finish', which returns VM loop function.
VM loop function executes `finish' instruction, then VM loop
function returns itself.
With such mechanism, `leave' instruction (which returns one
frame from current scope) doesn't need to check that this `leave'
should also return from VM loop function.
Strictly, one branch can be removed from `leave' instructon.
Consideration:
However, pushing the `finish frame' needs costs because
it needs several memory accesses. The number of pushing
`finish frame' is greater than I had assumed. Of course,
pushing `finish frame' consumes additional control frame.
Moreover, recent processors has good branch prediction,
with which we can ignore such trivial checking.
After this commit:
Finally, I decide to remove `finish frame' and `finish'
instruction. Some parts of VM depend on `finish frame',
so the new frame flag VM_FRAME_FLAG_FINISH is introduced.
If this frame should escape from VM function loop, then
the result of VM_FRAME_TYPE_FINISH_P(cfp) is true.
`leave' instruction checks this flag every time.
I measured performance on it. However on my environments,
it improves some benchmarks and slows some benchmarks down.
Maybe it is because of C compiler optimization parameters.
I'll re-visit here if this cause problems.
* insns.def (leave, finish): remove finish instruction.
* vm.c, vm_eval.c, vm_exec.c, vm_backtrace.c, vm_dump.c:
apply above changes.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36099 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2012-06-15 14:22:34 +04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
RESTORE_REGS();
|
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with control flow 3: exception */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* longjump */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
throw
|
2007-05-03 13:09:14 +04:00
|
|
|
(rb_num_t throw_state)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE throwobj)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as leave. */
|
|
|
|
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2017-10-27 09:21:50 +03:00
|
|
|
val = vm_throw(ec, GET_CFP(), throw_state, throwobj);
|
2007-08-06 15:36:30 +04:00
|
|
|
THROW_EXCEPTION(val);
|
2007-01-16 11:52:22 +03:00
|
|
|
/* unreachable */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************/
|
|
|
|
/* deal with control flow 4: local jump */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* set PC to (PC + dst). */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
jump
|
|
|
|
(OFFSET dst)
|
|
|
|
()
|
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as leave. */
|
2020-12-17 09:08:04 +03:00
|
|
|
// attr bool leaf = leafness_of_check_ints; /* has rb_threadptr_execute_interrupts() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
2017-11-06 10:44:28 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
2007-01-16 11:52:22 +03:00
|
|
|
JUMP(dst);
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* if val is not false or nil, set PC to (PC + dst). */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
branchif
|
|
|
|
(OFFSET dst)
|
|
|
|
(VALUE val)
|
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as jump. */
|
2020-12-17 09:08:04 +03:00
|
|
|
// attr bool leaf = leafness_of_check_ints; /* has rb_threadptr_execute_interrupts() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
|
|
|
if (RTEST(val)) {
|
2017-11-06 10:44:28 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
2007-01-16 11:52:22 +03:00
|
|
|
JUMP(dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* if val is false or nil, set PC to (PC + dst). */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
branchunless
|
|
|
|
(OFFSET dst)
|
|
|
|
(VALUE val)
|
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as jump. */
|
2020-12-17 09:08:04 +03:00
|
|
|
// attr bool leaf = leafness_of_check_ints; /* has rb_threadptr_execute_interrupts() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
|
|
|
if (!RTEST(val)) {
|
2017-11-06 10:44:28 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
2007-01-16 11:52:22 +03:00
|
|
|
JUMP(dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* if val is nil, set PC to (PC + dst). */
|
2015-10-22 09:30:12 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
branchnil
|
|
|
|
(OFFSET dst)
|
|
|
|
(VALUE val)
|
|
|
|
()
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as jump. */
|
2020-12-17 09:08:04 +03:00
|
|
|
// attr bool leaf = leafness_of_check_ints; /* has rb_threadptr_execute_interrupts() */
|
2015-10-22 09:30:12 +03:00
|
|
|
{
|
|
|
|
if (NIL_P(val)) {
|
2017-11-06 10:44:28 +03:00
|
|
|
RUBY_VM_CHECK_INTS(ec);
|
2015-10-22 09:30:12 +03:00
|
|
|
JUMP(dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-01-16 11:52:22 +03:00
|
|
|
/**********************************************************/
|
|
|
|
/* for optimize */
|
|
|
|
/**********************************************************/
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* run iseq only once */
|
2013-08-20 21:41:13 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
once
|
2018-03-19 21:21:54 +03:00
|
|
|
(ISEQ iseq, ISE ise)
|
2013-08-20 21:41:13 +04:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2018-03-19 21:21:54 +03:00
|
|
|
val = vm_once_dispatch(ec, iseq, ise);
|
2013-08-20 21:41:13 +04:00
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* case dispatcher, jump by table if possible */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_case_dispatch
|
|
|
|
(CDHASH hash, OFFSET else_offset)
|
|
|
|
(..., VALUE key)
|
2018-01-12 11:38:07 +03:00
|
|
|
()
|
|
|
|
// attr rb_snum_t sp_inc = -1;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
OFFSET dst = vm_case_dispatch(hash, else_offset, key);
|
|
|
|
|
|
|
|
if (dst) {
|
|
|
|
JUMP(dst);
|
2009-08-12 09:55:06 +04:00
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/** simple functions */
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X+Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_plus
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_plus(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:57:19 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X-Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_minus
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_minus(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X*Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_mult
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_mult(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X/Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_div
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
2019-02-01 09:29:02 +03:00
|
|
|
/* In case of division by zero, it raises. Thus
|
|
|
|
* ZeroDivisionError#initialize is called. */
|
|
|
|
// attr bool leaf = false;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_div(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X%Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_mod
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
2020-12-26 06:17:16 +03:00
|
|
|
/* Same discussion as opt_div. */
|
2019-02-01 09:29:02 +03:00
|
|
|
// attr bool leaf = false;
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_mod(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X==Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_eq
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2020-05-29 11:42:23 +03:00
|
|
|
val = opt_equality(GET_ISEQ(), recv, obj, cd);
|
2007-01-16 11:52:22 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
2007-12-18 15:07:51 +03:00
|
|
|
}
|
2007-01-16 11:52:22 +03:00
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X!=Y. */
|
2007-12-18 15:07:51 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_neq
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd_eq, CALL_DATA cd)
|
2007-12-18 15:07:51 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
val = vm_opt_neq(GET_ISEQ(), cd, cd_eq, recv, obj);
|
2007-12-18 15:07:51 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X<Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_lt
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_lt(recv, obj);
|
2007-01-16 11:52:22 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X<=Y. */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_le
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_le(recv, obj);
|
2007-01-16 11:52:22 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X>Y. */
|
2007-05-21 08:46:51 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_gt
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-05-21 08:46:51 +04:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_gt(recv, obj);
|
2007-05-21 08:46:51 +04:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-05-21 08:46:51 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized X>=Y. */
|
2007-05-21 08:46:51 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_ge
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-05-21 08:46:51 +04:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_ge(recv, obj);
|
2007-05-21 08:46:51 +04:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-05-21 08:46:51 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* << */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_ltlt
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
2019-02-01 09:29:02 +03:00
|
|
|
/* This instruction can append an integer, as a codepoint, into a
|
|
|
|
* string. Then what happens if that codepoint does not exist in the
|
|
|
|
* string's encoding? Of course an exception. That's not a leaf. */
|
|
|
|
// attr bool leaf = false; /* has "invalid codepoint" exception */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_ltlt(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 05:38:45 +03:00
|
|
|
/* optimized X&Y. */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_and
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2018-09-26 05:38:45 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
val = vm_opt_and(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-26 05:38:45 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* optimized X|Y. */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_or
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2018-09-26 05:38:45 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
|
|
|
val = vm_opt_or(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-26 05:38:45 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* [] */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_aref
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* This is complicated. In case of hash, vm_opt_aref() resorts to
|
|
|
|
* rb_hash_aref(). If `recv` has no `obj`, this function then yields
|
|
|
|
* default_proc. This is a method call. So opt_aref is
|
|
|
|
* (surprisingly) not leaf. */
|
|
|
|
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_aref(recv, obj);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* recv[obj] = set */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_aset
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv, VALUE obj, VALUE set)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* This is another story than opt_aref. When vm_opt_aset() resorts
|
|
|
|
* to rb_hash_aset(), which should call #hash for `obj`. */
|
|
|
|
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_aset(recv, obj, set);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* recv[str] = set */
|
2014-01-10 08:54:08 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_aset_with
|
2019-07-31 04:36:05 +03:00
|
|
|
(VALUE key, CALL_DATA cd)
|
2014-01-10 08:54:08 +04:00
|
|
|
(VALUE recv, VALUE val)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as opt_aset. */
|
|
|
|
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
|
2014-01-10 08:54:08 +04:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
VALUE tmp = vm_opt_aset_with(recv, key, val);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (!UNDEF_P(tmp)) {
|
2017-04-18 14:06:58 +03:00
|
|
|
val = tmp;
|
2014-01-25 07:15:30 +04:00
|
|
|
}
|
|
|
|
else {
|
2018-07-19 16:25:22 +03:00
|
|
|
TOPN(0) = rb_str_resurrect(key);
|
|
|
|
PUSH(val);
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2014-01-10 08:54:08 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* recv[str] */
|
2014-01-10 08:54:08 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_aref_with
|
2019-07-31 04:36:05 +03:00
|
|
|
(VALUE key, CALL_DATA cd)
|
2014-01-10 08:54:08 +04:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
2018-09-11 12:48:58 +03:00
|
|
|
/* Same discussion as opt_aref. */
|
|
|
|
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
|
2014-01-10 08:54:08 +04:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_aref_with(recv, key);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-07-19 16:25:22 +03:00
|
|
|
PUSH(rb_str_resurrect(key));
|
2018-09-14 10:57:19 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2014-01-10 08:54:08 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized length */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_length
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_length(recv, BOP_LENGTH);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized size */
|
2009-09-06 12:39:57 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_size
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2009-09-06 12:39:57 +04:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_length(recv, BOP_SIZE);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2009-09-06 12:39:57 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized empty? */
|
2012-09-26 13:34:46 +04:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_empty_p
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2012-09-26 13:34:46 +04:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_empty_p(recv);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2012-09-26 13:34:46 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized succ */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_succ
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_succ(recv);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized not */
|
2007-12-18 15:07:51 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_not
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-12-18 15:07:51 +03:00
|
|
|
(VALUE recv)
|
|
|
|
(VALUE val)
|
|
|
|
{
|
2020-01-08 10:14:01 +03:00
|
|
|
val = vm_opt_not(GET_ISEQ(), cd, recv);
|
2015-09-19 20:59:58 +03:00
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-12-18 15:07:51 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* optimized regexp match 2 */
|
2007-01-16 11:52:22 +03:00
|
|
|
DEFINE_INSN
|
|
|
|
opt_regexpmatch2
|
2019-07-31 04:36:05 +03:00
|
|
|
(CALL_DATA cd)
|
2007-01-16 11:52:22 +03:00
|
|
|
(VALUE obj2, VALUE obj1)
|
|
|
|
(VALUE val)
|
2019-02-19 18:39:35 +03:00
|
|
|
// attr bool leaf = false; /* match_at() has rb_thread_check_ints() */
|
2007-01-16 11:52:22 +03:00
|
|
|
{
|
split insns.def into functions
Contemporary C compilers are good at function inlining. They fold
multiple functions into one. However they are not yet smart enough to
unfold a function into several ones. So generally speaking, it is
wiser for a C programmer to manually split C functions whenever
possible. That should make rooms for compilers to optimize at will.
Before this changeset insns.def was converted into single HUGE
function called vm_exec_core(). By moving each instruction's core
into individual functions, generated C source code is reduced from
3,428 lines to 2,847 lines. Looking at the generated assembly
however, it seems my compiler (gcc 6.2) is extraordinary smart so that
it inlines almost all functions I introduced in this changeset back
into that vm_exec_core. On my machine compiled machine binary of the
function does not shrink very much in size (28,432 bytes to 26,816
bytes, according to nm(1)).
I believe this change is zero-cost. Several benchmarks I exercised
showed no significant difference beyond error mergin. For instance
3 repeated runs of optcarrot benchmark on my machine resulted in:
before this: 28.330329285707490, 27.513378371065920, 29.40420215754537
after this: 27.107195867280414, 25.549324021385907, 30.31581919050884
in fps (greater==faster).
----
* internal.h (rb_obj_not_equal): used from vm_insnhelper.c
* insns.def: move vast majority of lines into vm_insnhelper.c
* vm_insnhelper.c: moved here.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2017-04-18 13:58:49 +03:00
|
|
|
val = vm_opt_regexpmatch2(obj2, obj1);
|
|
|
|
|
2024-01-30 08:48:59 +03:00
|
|
|
if (UNDEF_P(val)) {
|
2018-09-14 10:44:44 +03:00
|
|
|
CALL_SIMPLE_METHOD();
|
2007-01-16 11:52:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-07 10:58:00 +03:00
|
|
|
/* call specific function with args */
|
|
|
|
DEFINE_INSN
|
|
|
|
invokebuiltin
|
|
|
|
(RB_BUILTIN bf)
|
|
|
|
(...)
|
2020-07-09 15:43:42 +03:00
|
|
|
(VALUE val)
|
2019-11-07 10:58:00 +03:00
|
|
|
// attr bool leaf = false; /* anything can happen inside */
|
|
|
|
// attr rb_snum_t sp_inc = 1 - bf->argc;
|
|
|
|
{
|
2020-07-09 15:43:42 +03:00
|
|
|
val = vm_invoke_builtin(ec, reg_cfp, bf, STACK_ADDR_FROM_TOP(bf->argc));
|
2019-11-07 10:58:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* call specific function with args (same parameters) */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_invokebuiltin_delegate
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
(RB_BUILTIN bf, rb_num_t index)
|
2019-11-07 10:58:00 +03:00
|
|
|
()
|
2020-07-09 15:43:42 +03:00
|
|
|
(VALUE val)
|
2019-11-07 10:58:00 +03:00
|
|
|
// attr bool leaf = false; /* anything can happen inside */
|
|
|
|
{
|
2020-07-09 15:43:42 +03:00
|
|
|
val = vm_invoke_builtin_delegate(ec, reg_cfp, bf, (unsigned int)index);
|
2019-11-07 10:58:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* call specific function with args (same parameters) and leave */
|
|
|
|
DEFINE_INSN
|
|
|
|
opt_invokebuiltin_delegate_leave
|
vm_invoke_builtin_delegate with start index.
opt_invokebuiltin_delegate and opt_invokebuiltin_delegate_leave
invokes builtin functions with same parameters of the method.
This technique eliminate stack push operations. However, delegation
parameters should be completely same as given parameters.
(e.g. `def foo(a, b, c) __builtin_foo(a, b, c)` is okay, but
__builtin_foo(b, c) is not allowed)
This patch relaxes this restriction. ISeq has a local variables
table which includes parameters. For example, the method defined
as `def foo(a, b, c) x=y=nil`, then local variables table contains
[a, b, c, x, y]. If calling builtin-function with arguments which
are sub-array of the lvar table, use opt_invokebuiltin_delegate
instruction with start index. For example, `__builtin_foo(b, c)`,
`__builtin_bar(c, x, y)` is okay, and so on.
2019-11-15 11:49:49 +03:00
|
|
|
(RB_BUILTIN bf, rb_num_t index)
|
2019-11-07 10:58:00 +03:00
|
|
|
()
|
|
|
|
(VALUE val)
|
|
|
|
// attr bool leaf = false; /* anything can happen inside */
|
|
|
|
{
|
2019-11-18 04:36:48 +03:00
|
|
|
val = vm_invoke_builtin_delegate(ec, reg_cfp, bf, (unsigned int)index);
|
2019-11-07 10:58:00 +03:00
|
|
|
|
|
|
|
/* leave fastpath */
|
2020-06-24 09:41:57 +03:00
|
|
|
/* TracePoint/return fallbacks this insn to opt_invokebuiltin_delegate */
|
2019-11-07 10:58:00 +03:00
|
|
|
if (vm_pop_frame(ec, GET_CFP(), GET_EP())) {
|
|
|
|
#if OPT_CALL_THREADED_CODE
|
|
|
|
rb_ec_thread_ptr(ec)->retval = val;
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return val;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
RESTORE_REGS();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-26 04:11:20 +03:00
|
|
|
/* BLT */
|
2019-09-02 20:51:48 +03:00
|
|
|
DEFINE_INSN_IF(SUPPORT_JOKE)
|
2018-09-26 04:11:20 +03:00
|
|
|
bitblt
|
|
|
|
()
|
|
|
|
()
|
|
|
|
(VALUE ret)
|
|
|
|
{
|
|
|
|
ret = rb_str_new2("a bit of bacon, lettuce and tomato");
|
|
|
|
}
|
|
|
|
|
2018-01-12 11:38:07 +03:00
|
|
|
/* The Answer to Life, the Universe, and Everything */
|
2019-09-02 20:51:48 +03:00
|
|
|
DEFINE_INSN_IF(SUPPORT_JOKE)
|
2007-01-16 11:52:22 +03:00
|
|
|
answer
|
|
|
|
()
|
|
|
|
()
|
|
|
|
(VALUE ret)
|
|
|
|
{
|
|
|
|
ret = INT2FIX(42);
|
|
|
|
}
|