insns.def: refactor to avoid CALL_METHOD macro

These send and its variant instructions are the most frequently called
paths in the entire process.  Reducing macro expansions to make them
dedicated function called vm_sendish() is the main goal of this
changeset.  It reduces the size of vm_exec_coref from 25,552 bytes to
23,728 bytes on my machine.

I see no significant slowdown.

Fix: [GH-2056]

vanilla: ruby 2.6.0dev (2018-12-19 trunk 66449) [x86_64-darwin15]
ours: ruby 2.6.0dev (2018-12-19 refactor-send 66449) [x86_64-darwin15]
last_commit=insns.def: refactor to avoid CALL_METHOD macro
Calculating -------------------------------------
                         vanilla        ours
   vm2_defined_method     2.645M      2.823M i/s -      6.000M times in 5.109888s 4.783254s
           vm2_method     8.553M      8.873M i/s -      6.000M times in 1.579892s 1.524026s
   vm2_method_missing     3.772M      3.858M i/s -      6.000M times in 3.579482s 3.499220s
vm2_method_with_block     8.494M      8.944M i/s -      6.000M times in 1.589774s 1.509463s
      vm2_poly_method      0.571       0.607 i/s -       1.000 times in 3.947570s 3.733528s
   vm2_poly_method_ov      5.514       5.168 i/s -       1.000 times in 0.408156s 0.436169s
 vm3_clearmethodcache      2.875       2.837 i/s -       1.000 times in 0.783018s 0.793493s

Comparison:
                vm2_defined_method
                 ours:   2822555.4 i/s
              vanilla:   2644878.1 i/s - 1.07x  slower

                        vm2_method
                 ours:   8872947.8 i/s
              vanilla:   8553433.1 i/s - 1.04x  slower

                vm2_method_missing
                 ours:   3858192.3 i/s
              vanilla:   3772296.3 i/s - 1.02x  slower

             vm2_method_with_block
                 ours:   8943825.1 i/s
              vanilla:   8493955.0 i/s - 1.05x  slower

                   vm2_poly_method
                 ours:         0.6 i/s
              vanilla:         0.6 i/s - 1.06x  slower

                vm2_poly_method_ov
              vanilla:         5.5 i/s
                 ours:         5.2 i/s - 1.07x  slower

              vm3_clearmethodcache
              vanilla:         2.9 i/s
                 ours:         2.8 i/s - 1.01x  slower



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
shyouhei 2018-12-26 00:59:37 +00:00
Родитель 686881d383
Коммит d46ab95376
7 изменённых файлов: 146 добавлений и 69 удалений

Просмотреть файл

@ -740,12 +740,13 @@ send
(VALUE val)
// attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
{
struct rb_calling_info calling;
VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, false);
val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap);
calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, FALSE);
calling.recv = TOPN(calling.argc = ci->orig_argc);
vm_search_method(ci, cc, calling.recv);
CALL_METHOD(&calling, ci, cc);
if (val == Qundef) {
RESTORE_REGS();
NEXT_INSN();
}
}
/* Invoke method without block */
@ -757,10 +758,13 @@ opt_send_without_block
// attr bool handles_sp = true;
// attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
{
struct rb_calling_info calling;
calling.block_handler = VM_BLOCK_HANDLER_NONE;
vm_search_method(ci, cc, calling.recv = TOPN(calling.argc = ci->orig_argc));
CALL_METHOD(&calling, ci, cc);
VALUE bh = VM_BLOCK_HANDLER_NONE;
val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_method_wrap);
if (val == Qundef) {
RESTORE_REGS();
NEXT_INSN();
}
}
DEFINE_INSN
@ -826,12 +830,13 @@ invokesuper
(VALUE val)
// attr rb_snum_t sp_inc = sp_inc_of_sendish(ci);
{
struct rb_calling_info calling;
VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), ci, blockiseq, true);
val = vm_sendish(ec, GET_CFP(), ci, cc, bh, vm_search_super_method);
calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, ci, blockiseq, TRUE);
calling.recv = TOPN(calling.argc = ci->orig_argc);
vm_search_super_method(ec, GET_CFP(), &calling, ci, cc);
CALL_METHOD(&calling, ci, cc);
if (val == Qundef) {
RESTORE_REGS();
NEXT_INSN();
}
}
/* yield(args) */
@ -843,21 +848,16 @@ invokeblock
// attr bool handles_sp = true;
// attr rb_snum_t sp_inc = sp_inc_of_invokeblock(ci);
{
struct rb_calling_info calling;
VALUE block_handler;
static struct rb_call_cache cc = {
0, 0, NULL, vm_invokeblock_i,
};
calling.argc = ci->orig_argc;
calling.block_handler = VM_BLOCK_HANDLER_NONE;
calling.recv = Qundef; /* should not be used */
VALUE bh = VM_BLOCK_HANDLER_NONE;
val = vm_sendish(ec, GET_CFP(), ci, &cc, bh, vm_search_invokeblock);
block_handler = VM_CF_BLOCK_HANDLER(GET_CFP());
if (block_handler == VM_BLOCK_HANDLER_NONE) {
rb_vm_localjump_error("no block given (yield)", Qnil, 0);
}
val = vm_invoke_block(ec, GET_CFP(), &calling, ci, block_handler);
if (val == Qundef) {
EXEC_EC_CFP(val);
RESTORE_REGS();
NEXT_INSN();
}
}

Просмотреть файл

@ -81,6 +81,6 @@
% end
%
% # compiler: If insn returns (leave) or does longjmp (throw), the branch should no longer be compiled. TODO: create attr for it?
% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\sRESTORE_REGS\(\);/
% if insn.expr.expr =~ /\sTHROW_EXCEPTION\([^)]+\);/ || insn.expr.expr =~ /\bvm_pop_frame\(/
b->finish_p = TRUE;
% end

Просмотреть файл

@ -29,6 +29,12 @@
% #endif
% RESTORE_REGS
% end
% expr.gsub!(/^(?<indent>\s*)NEXT_INSN\(\);\n/) do
% indent = Regexp.last_match[:indent]
% <<-end.gsub(/^ +/, '')
% #{indent}UNREACHABLE_RETURN(Qundef);
% end
% end
% end
% end
%

13
vm.c
Просмотреть файл

@ -28,6 +28,17 @@
VALUE rb_str_concat_literals(size_t, const VALUE*);
/* :FIXME: This #ifdef is because we build pch in case of mswin and
* not in case of other situations. That distinction might change in
* a future. We would better make it detectable in something better
* than just _MSC_VER. */
#ifdef _MSC_VER
RUBY_FUNC_EXPORTED
#else
MJIT_FUNC_EXPORTED
#endif
VALUE vm_exec(rb_execution_context_t *, int);
PUREFUNC(static inline const VALUE *VM_EP_LEP(const VALUE *));
static inline const VALUE *
VM_EP_LEP(const VALUE *ep)
@ -1870,7 +1881,7 @@ static inline VALUE
vm_exec_handle_exception(rb_execution_context_t *ec, enum ruby_tag_type state,
VALUE errinfo, VALUE *initial);
MJIT_FUNC_EXPORTED VALUE
VALUE
vm_exec(rb_execution_context_t *ec, int mjit_enable_p)
{
enum ruby_tag_type state;

Просмотреть файл

@ -875,7 +875,7 @@ refine_sym_proc_call(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, callback_arg))
static VALUE
vm_caller_setup_arg_block(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
const struct rb_call_info *ci, rb_iseq_t *blockiseq, const int is_super)
const struct rb_call_info *ci, const rb_iseq_t *blockiseq, const int is_super)
{
if (ci->flag & VM_CALL_ARGS_BLOCKARG) {
VALUE block_code = *(--reg_cfp->sp);

Просмотреть файл

@ -2444,8 +2444,7 @@ vm_super_outside(void)
}
static void
vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
struct rb_calling_info *calling, struct rb_call_info *ci, struct rb_call_cache *cc)
vm_search_super_method(const rb_control_frame_t *reg_cfp, struct rb_call_info *ci, struct rb_call_cache *cc, VALUE recv)
{
VALUE current_defined_class, klass;
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(reg_cfp);
@ -2463,14 +2462,14 @@ vm_search_super_method(const rb_execution_context_t *ec, rb_control_frame_t *reg
if (BUILTIN_TYPE(current_defined_class) != T_MODULE &&
BUILTIN_TYPE(current_defined_class) != T_ICLASS && /* bound UnboundMethod */
!FL_TEST(current_defined_class, RMODULE_INCLUDED_INTO_REFINEMENT) &&
!rb_obj_is_kind_of(calling->recv, current_defined_class)) {
!rb_obj_is_kind_of(recv, current_defined_class)) {
VALUE m = RB_TYPE_P(current_defined_class, T_ICLASS) ?
RBASIC(current_defined_class)->klass : current_defined_class;
rb_raise(rb_eTypeError,
"self has wrong type to call super in this context: "
"%"PRIsVALUE" (expected %"PRIsVALUE")",
rb_obj_class(calling->recv), m);
rb_obj_class(recv), m);
}
if (me->def->type == VM_METHOD_TYPE_BMETHOD && (ci->flag & VM_CALL_ZSUPER)) {
@ -3187,6 +3186,103 @@ vm_find_or_create_class_by_id(ID id,
}
}
static void
vm_search_method_wrap(
const struct rb_control_frame_struct *reg_cfp,
struct rb_call_info *ci,
struct rb_call_cache *cc,
VALUE recv)
{
vm_search_method(ci, cc, recv);
}
static void
vm_search_invokeblock(
const struct rb_control_frame_struct *reg_cfp,
struct rb_call_info *ci,
struct rb_call_cache *cc,
VALUE recv)
{
/* Does nothing. */
}
static VALUE
vm_invokeblock_i(
struct rb_execution_context_struct *ec,
struct rb_control_frame_struct *reg_cfp,
struct rb_calling_info *calling,
const struct rb_call_info *ci,
struct rb_call_cache *cc)
{
VALUE block_handler = VM_CF_BLOCK_HANDLER(GET_CFP());
if (block_handler == VM_BLOCK_HANDLER_NONE) {
rb_vm_localjump_error("no block given (yield)", Qnil, 0);
}
else {
return vm_invoke_block(ec, GET_CFP(), calling, ci, block_handler);
}
}
static VALUE
vm_sendish(
struct rb_execution_context_struct *ec,
struct rb_control_frame_struct *reg_cfp,
struct rb_call_info *ci,
struct rb_call_cache *cc,
VALUE block_handler,
void (*method_explorer)(
const struct rb_control_frame_struct *reg_cfp,
struct rb_call_info *ci,
struct rb_call_cache *cc,
VALUE recv))
{
VALUE val;
int argc = ci->orig_argc;
VALUE recv = TOPN(argc);
struct rb_calling_info calling;
calling.block_handler = block_handler;
calling.recv = recv;
calling.argc = argc;
method_explorer(GET_CFP(), ci, cc, recv);
val = cc->call(ec, GET_CFP(), &calling, ci, cc);
if (val != Qundef) {
return val; /* CFUNC normal return */
}
else {
RESTORE_REGS(); /* CFP pushed in cc->call() */
}
#ifdef MJIT_HEADER
/* When calling ISeq which may catch an exception from JIT-ed
code, we should not call mjit_exec directly to prevent the
caller frame from being canceled. That's because the caller
frame may have stack values in the local variables and the
cancelling the caller frame will purge them. But directly
calling mjit_exec is faster... */
if (GET_ISEQ()->body->catch_except_p) {
VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH);
return vm_exec(ec, true);
}
else if ((val = mjit_exec(ec)) == Qundef) {
VM_ENV_FLAGS_SET(GET_EP(), VM_FRAME_FLAG_FINISH);
return vm_exec(ec, false);
}
else {
return val;
}
#else
/* When calling from VM, longjmp in the callee won't purge any
JIT-ed caller frames. So it's safe to directly call
mjit_exec. */
return mjit_exec(ec);
#endif
}
static VALUE
vm_opt_str_freeze(VALUE str, int bop, ID id)
{

Просмотреть файл

@ -137,42 +137,6 @@ enum vm_regan_acttype {
/* deal with control flow 2: method/iterator */
/**********************************************************/
#ifdef MJIT_HEADER
/* When calling ISeq which may catch an exception from JIT-ed code, we should not call
mjit_exec directly to prevent the caller frame from being canceled. That's because
the caller frame may have stack values in the local variables and the cancelling
the caller frame will purge them. But directly calling mjit_exec is faster... */
#define EXEC_EC_CFP(val) do { \
if (ec->cfp->iseq->body->catch_except_p) { \
VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \
val = vm_exec(ec, TRUE); \
} \
else if ((val = mjit_exec(ec)) == Qundef) { \
VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); \
val = vm_exec(ec, FALSE); \
} \
} while (0)
#else
/* When calling from VM, longjmp in the callee won't purge any JIT-ed caller frames.
So it's safe to directly call mjit_exec. */
#define EXEC_EC_CFP(val) do { \
if ((val = mjit_exec(ec)) == Qundef) { \
RESTORE_REGS(); \
NEXT_INSN(); \
} \
} while (0)
#endif
#define CALL_METHOD(calling, ci, cc) do { \
VALUE v = (*(cc)->call)(ec, GET_CFP(), (calling), (ci), (cc)); \
if (v == Qundef) { \
EXEC_EC_CFP(val); \
} \
else { \
val = v; \
} \
} while (0)
/* set fastpath when cached method is *NOT* protected
* because inline method cache does not care about receiver.
*/