mjit_compile.c: resurrect local variable stack

This optimization was reverted on r63863, but this commit resurrects the
optimization to skip some sp motions on JIT execution.

tool/ruby_vm/views/_mjit_compile_insn_body.erb: ditto
tool/ruby_vm/views/_mjit_compile_insn.erb: ditto

insns.def: resurrect handles_frame as handles_stack, which was deleted
on r63763.
tool/ruby_vm/models/bare_instructions.rb: ditto

vm_insnhelper.c: prevent moving sp outside insns.def to allow modifying
it by JIT.

* Optcarrot benchmark

$ benchmark-driver benchmark.yml --rbenv 'before --jit;after --jit' --repeat-count 12 -v
before --jit: ruby 2.6.0dev (2018-07-17 trunk 63987) +JIT [x86_64-linux]
after --jit: ruby 2.6.0dev (2018-07-17 local-stack 63987) +JIT [x86_64-linux]
last_commit=mjit_compile.c: resurrect local variable stack
Calculating -------------------------------------
                         before --jit  after --jit
Optcarrot Lan_Master.nes       70.518       72.144 fps

Comparison:
             Optcarrot Lan_Master.nes
             after --jit:        72.1 fps
            before --jit:        70.5 fps - 1.02x  slower

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63988 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
k0kubun 2018-07-17 15:09:41 +00:00
Родитель e749134f0a
Коммит 6a4bb345df
9 изменённых файлов: 112 добавлений и 33 удалений

Просмотреть файл

@ -43,6 +43,9 @@
* sp_inc: Used to dynamically calculate sp increase in
`insn_stack_increase`.
* handles_stack: If this is true, JIT makes sure that values are
set to VM stack.
- Attributes can access operands, but not stack (push/pop) variables.
- An instruction's body is a pure C block, copied verbatimly into
@ -451,7 +454,7 @@ expandarray
(...)
// attr rb_snum_t sp_inc = num - 1 + (flag & 1 ? 1 : 0);
{
vm_expandarray(GET_CFP(), ary, num, (int)flag);
INC_SP(vm_expandarray(GET_SP(), ary, num, (int)flag));
}
/* concat two arrays */
@ -690,6 +693,7 @@ defineclass
(ID id, ISEQ class_iseq, rb_num_t flags)
(VALUE cbase, VALUE super)
(VALUE val)
// attr bool handles_stack = true;
{
VALUE klass = vm_find_or_create_class_by_id(id, flags, cbase, super);
@ -716,6 +720,7 @@ send
(CALL_INFO ci, CALL_CACHE cc, ISEQ blockiseq)
(...)
(VALUE val)
// attr bool handles_stack = true;
// attr rb_snum_t sp_inc = - (int)(ci->orig_argc + ((ci->flag & VM_CALL_ARGS_BLOCKARG) ? 1 : 0));
{
struct rb_calling_info calling;
@ -771,6 +776,7 @@ opt_send_without_block
(CALL_INFO ci, CALL_CACHE cc)
(...)
(VALUE val)
// attr bool handles_stack = true;
// attr rb_snum_t sp_inc = -ci->orig_argc;
{
struct rb_calling_info calling;
@ -785,6 +791,7 @@ invokesuper
(CALL_INFO ci, CALL_CACHE cc, ISEQ blockiseq)
(...)
(VALUE val)
// attr bool handles_stack = true;
// attr rb_snum_t sp_inc = - (int)(ci->orig_argc + ((ci->flag & VM_CALL_ARGS_BLOCKARG) ? 1 : 0));
{
struct rb_calling_info calling;
@ -802,6 +809,7 @@ invokeblock
(CALL_INFO ci)
(...)
(VALUE val)
// attr bool handles_stack = true;
// attr rb_snum_t sp_inc = 1 - ci->orig_argc;
{
struct rb_calling_info calling;
@ -828,6 +836,7 @@ leave
()
(VALUE val)
(VALUE val)
// attr bool handles_stack = true;
{
if (OPT_CHECKED_RUN) {
const VALUE *const bp = vm_base_ptr(reg_cfp);
@ -989,9 +998,9 @@ opt_plus
val = vm_opt_plus(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1008,9 +1017,9 @@ opt_minus
val = vm_opt_minus(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1027,9 +1036,9 @@ opt_mult
val = vm_opt_mult(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1046,9 +1055,9 @@ opt_div
val = vm_opt_div(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1065,9 +1074,9 @@ opt_mod
val = vm_opt_mod(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1084,9 +1093,9 @@ opt_eq
val = opt_eq_func(recv, obj, ci, cc);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1103,9 +1112,9 @@ opt_neq
val = vm_opt_neq(ci, cc, ci_eq, cc_eq, recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1122,9 +1131,9 @@ opt_lt
val = vm_opt_lt(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1141,9 +1150,9 @@ opt_le
val = vm_opt_le(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1160,9 +1169,9 @@ opt_gt
val = vm_opt_gt(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1179,9 +1188,9 @@ opt_ge
val = vm_opt_ge(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1198,9 +1207,9 @@ opt_ltlt
val = vm_opt_ltlt(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1217,9 +1226,9 @@ opt_aref
val = vm_opt_aref(recv, obj);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1236,10 +1245,10 @@ opt_aset
val = vm_opt_aset(recv, obj, set);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(obj);
PUSH(set);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1259,12 +1268,12 @@ opt_aset_with
val = tmp;
}
else {
#ifndef MJIT_HEADER
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(rb_str_resurrect(key));
#endif
PUSH(val);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1281,8 +1290,8 @@ opt_aref_with
val = vm_opt_aref_with(recv, key);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
PUSH(rb_str_resurrect(key));
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
@ -1300,8 +1309,8 @@ opt_length
val = vm_opt_length(recv, BOP_LENGTH);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1318,8 +1327,8 @@ opt_size
val = vm_opt_length(recv, BOP_SIZE);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1336,8 +1345,8 @@ opt_empty_p
val = vm_opt_empty_p(recv);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1354,8 +1363,8 @@ opt_succ
val = vm_opt_succ(recv);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1372,8 +1381,8 @@ opt_not
val = vm_opt_not(ci, cc, recv);
if (val == Qundef) {
PUSH(recv);
#ifndef MJIT_HEADER
PUSH(recv);
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1400,9 +1409,9 @@ opt_regexpmatch2
val = vm_opt_regexpmatch2(obj2, obj1);
if (val == Qundef) {
#ifndef MJIT_HEADER
PUSH(obj2);
PUSH(obj1);
#ifndef MJIT_HEADER
ADD_PC(-WIDTH_OF_opt_send_without_block);
#endif
DISPATCH_ORIGINAL_INSN(opt_send_without_block);
@ -1415,6 +1424,7 @@ opt_call_c_function
(rb_insn_func_t funcptr)
()
()
// attr bool handles_stack = true;
{
reg_cfp = (funcptr)(ec, reg_cfp);

Просмотреть файл

@ -24,6 +24,9 @@
struct compile_status {
int success; /* has TRUE if compilation has had no issue */
int *stack_size_for_pos; /* stack_size_for_pos[pos] has stack size for the position (otherwise -1) */
/* If TRUE, JIT-ed code will use local variables to store pushed values instead of
using VM's stack and moving stack pointer. */
int local_stack_p;
};
/* Storage to keep data which is consistent in each conditional branch.
@ -172,7 +175,13 @@ compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int st
static void
compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status)
{
unsigned int i;
fprintf(f, "\ncancel:\n");
if (status->local_stack_p) {
for (i = 0; i < body->stack_max; i++) {
fprintf(f, " *((VALUE *)reg_cfp->bp + %d) = stack[%d];\n", i + 1, i);
}
}
fprintf(f, " return Qundef;\n");
}
@ -182,6 +191,7 @@ mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *func
{
struct compile_status status;
status.success = TRUE;
status.local_stack_p = !body->catch_except_p;
status.stack_size_for_pos = ALLOC_N(int, body->iseq_size);
memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size);
@ -195,7 +205,12 @@ mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *func
fprintf(f, "__declspec(dllexport)\n");
#endif
fprintf(f, "VALUE\n%s(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", funcname);
if (status.local_stack_p) {
fprintf(f, " VALUE stack[%d];\n", body->stack_max);
}
else {
fprintf(f, " VALUE *stack = reg_cfp->sp;\n");
}
fprintf(f, " static const VALUE *const original_body_iseq = (VALUE *)0x%"PRIxVALUE";\n",
(VALUE)body->iseq_encoded);

Просмотреть файл

@ -101,6 +101,10 @@ class RubyVM::BareInstructions
}.join
end
def handles_stack?
/\b(false|0)\b/ !~ @attrs['handles_stack'].expr.expr
end
def inspect
sprintf "#<%s %s@%s:%d>", self.class.name, @name, @loc[0], @loc[1]
end
@ -125,6 +129,7 @@ class RubyVM::BareInstructions
generate_attribute 'rb_num_t', 'retn', rets.size
generate_attribute 'rb_num_t', 'width', width
generate_attribute 'rb_snum_t', 'sp_inc', rets.size - pops.size
generate_attribute 'bool', 'handles_stack', false
end
def typesplit a

Просмотреть файл

@ -20,6 +20,16 @@
MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>];
% end
%
% # JIT: Declare stack_size to be used in some macro of _mjit_compile_insn_body.erb
if (status->local_stack_p) {
% # TODO: can we use some functions to calculate this?
% if insn.name == 'send' || insn.name == 'invokesuper'
fprintf(f, " MAYBE_UNUSED(unsigned int) stack_size = %u;\n", b->stack_size - <%= insn.pops.size %> - ((ci->flag & VM_CALL_ARGS_BLOCKARG) ? 1 : 0));
% else
fprintf(f, " MAYBE_UNUSED(unsigned int) stack_size = %u;\n", b->stack_size - <%= insn.pops.size %>);
% end
}
%
% # JIT: Declare variables for operands, popped values and return values
% insn.declarations.each do |decl|
fprintf(f, " <%= decl %>;\n");

Просмотреть файл

@ -69,8 +69,28 @@
% end
% when /\A\s+DISPATCH_ORIGINAL_INSN\([^)]+\);\s+\z/
% # For `opt_xxx`'s fallbacks.
fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1);
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " goto cancel;\n");
% # If local_stack_p is TRUE, stack values are only available in local variables
% # for stack. So we need to replace those macros if local_stack_p is TRUE here.
% when /\bGET_SP\(\)/
% # reg_cfp->sp
fprintf(f, <%= to_cstr.call(line.sub(/\bGET_SP\(\)/, '%s')) %>, (status->local_stack_p ? "(stack + stack_size)" : "GET_SP()"));
% when /\bSTACK_ADDR_FROM_TOP\((?<num>[^)]+)\)/
% # #define STACK_ADDR_FROM_TOP(n) (GET_SP()-(n))
% num = Regexp.last_match[:num]
fprintf(f, <%= to_cstr.call(line.sub(/\bSTACK_ADDR_FROM_TOP\(([^)]+)\)/, '%s')) %>,
(status->local_stack_p ? "stack + (stack_size - (<%= num %>))" : "STACK_ADDR_FROM_TOP(<%= num %>)"));
% when /\bTOPN\((?<num>[^)]+)\)/
% # #define TOPN(n) (*(GET_SP()-(n)-1))
% num = Regexp.last_match[:num]
fprintf(f, <%= to_cstr.call(line.sub(/\bTOPN\(([^)]+)\)/, '%s')) %>,
(status->local_stack_p ? "*(stack + (stack_size - (<%= num %>) - 1))" : "TOPN(<%= num %>)"));
% when /\bPOPN\([^)]+\)/ # skip sp motion on JIT
fprintf(f, <%= to_cstr.call(line.sub(/\bPOPN\((?<num>[^)]+)\)/) { Regexp.last_match[:num] }) %>);
% when /\bINC_SP\([^)]+\)/ # skip sp motion on JIT
fprintf(f, <%= to_cstr.call(line.sub(/\bINC_SP\((?<num>[^)]+)\)/) { Regexp.last_match[:num] }) %>);
% else
fprintf(f, <%= to_cstr.call(line) %>);
% end

Просмотреть файл

@ -12,4 +12,19 @@
}
%
% # JIT: move sp to use or preserve stack variables
if (status->local_stack_p) {
% # sp motion is optimized away for `handles_frame? #=> false` case.
% # Thus sp should be set properly before `goto cancel`.
% if insn.handles_stack?
% # JIT-only behavior (pushing JIT's local variables to VM's stack):
rb_snum_t i, push_size;
push_size = -<%= insn.call_attribute('sp_inc') %> + <%= insn.rets.size %> - <%= insn.pops.size %>;
fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1 - <%= insn.pops.size %>); /* POPN(INSN_ATTR(popn)); */
for (i = 0; i < push_size; i++) { /* TODO: use memcpy? */
fprintf(f, " *(reg_cfp->sp + %ld) = stack[%ld];\n", i - push_size, (rb_snum_t)b->stack_size - push_size + i);
}
% end
}
else {
fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1 - <%= insn.pops.size %>); /* POPN(INSN_ATTR(popn)); */
}

Просмотреть файл

@ -24,6 +24,11 @@
int param_size = iseq->body->param.size; /* TODO: check calling->argc for argument_arity_error */
fprintf(f, "{\n");
% # JIT: Declare stack_size to be used in some macro of _mjit_compile_insn_body.erb
if (status->local_stack_p) {
/* TODO: can we use some functions to calculate this? */
fprintf(f, " MAYBE_UNUSED(unsigned int) stack_size = %u;\n", b->stack_size - <%= insn.pops.size %> - ((ci->flag & VM_CALL_ARGS_BLOCKARG) ? 1 : 0));
}
% # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
<%= render 'mjit_compile_send_guard' -%>

Просмотреть файл

@ -31,13 +31,13 @@
% # reg_cfp: the second argument of _mjitXXX
% # GET_CFP(): refers to `reg_cfp`
% # GET_EP(): refers to `reg_cfp->ep`
% # GET_SP(): refers to `reg_cfp->sp`
% # GET_SP(): refers to `reg_cfp->sp`, or `(stack + stack_size)` if local_stack_p
% # GET_SELF(): refers to `reg_cfp->self`
% # GET_LEP(): refers to `VM_EP_LEP(reg_cfp->ep)`
% # EXEC_EC_CFP(): refers to `val = vm_exec(ec, TRUE)` with frame setup
% # CALL_METHOD(): using `GET_CFP()` and `EXEC_EC_CFP()`
% # TOPN(): refers to `reg_cfp->sp`
% # STACK_ADDR_FROM_TOP(): refers to `reg_cfp->sp`
% # TOPN(): refers to `reg_cfp->sp`, or `*(stack + (stack_size - num - 1))` if local_stack_p
% # STACK_ADDR_FROM_TOP(): refers to `reg_cfp->sp`, or `stack + (stack_size - num)` if local_stack_p
% # DISPATCH_ORIGINAL_INSN(): expanded in _mjit_compile_insn.erb
% # THROW_EXCEPTION(): specially defined for JIT
% # RESTORE_REGS(): specially defined for `leave`

Просмотреть файл

@ -1236,12 +1236,12 @@ vm_throw(const rb_execution_context_t *ec, rb_control_frame_t *reg_cfp,
}
}
static inline void
vm_expandarray(rb_control_frame_t *cfp, VALUE ary, rb_num_t num, int flag)
static inline rb_num_t
vm_expandarray(VALUE *sp, VALUE ary, rb_num_t num, int flag)
{
int is_splat = flag & 0x01;
rb_num_t space_size = num + is_splat;
VALUE *base = cfp->sp;
VALUE *base = sp;
const VALUE *ptr;
rb_num_t len;
const VALUE obj = ary;
@ -1256,8 +1256,6 @@ vm_expandarray(rb_control_frame_t *cfp, VALUE ary, rb_num_t num, int flag)
len = (rb_num_t)RARRAY_LEN(ary);
}
cfp->sp += space_size;
if (flag & 0x02) {
/* post: ..., nil ,ary[-1], ..., ary[0..-num] # top */
rb_num_t i = 0, j;
@ -1299,6 +1297,7 @@ vm_expandarray(rb_control_frame_t *cfp, VALUE ary, rb_num_t num, int flag)
}
}
RB_GC_GUARD(ary);
return space_size;
}
static VALUE vm_call_general(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_call_info *ci, struct rb_call_cache *cc);