Optimize exivar access on JIT-ed getivar

JIT support of dd723771c1.

$ benchmark-driver -v --rbenv 'before;before --jit;after --jit' benchmark/mjit_exivar.yml --repeat-count=4
before: ruby 2.8.0dev (2020-03-30T12:32:26Z master e5db3da9d3) [x86_64-linux]
before --jit: ruby 2.8.0dev (2020-03-30T12:32:26Z master e5db3da9d3) +JIT [x86_64-linux]
after --jit: ruby 2.8.0dev (2020-03-31T05:57:24Z mjit-exivar 128625baec) +JIT [x86_64-linux]
Calculating -------------------------------------
                         before  before --jit  after --jit
         mjit_exivar    57.944M       53.579M      54.471M i/s -    200.000M times in 3.451588s 3.732772s 3.671687s

Comparison:
                      mjit_exivar
              before:  57944345.1 i/s
         after --jit:  54470876.7 i/s - 1.06x  slower
        before --jit:  53579483.4 i/s - 1.08x  slower
This commit is contained in:
Takashi Kokubun 2020-03-30 22:27:01 -07:00
Родитель e5db3da9d3
Коммит b736ea63bd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 6FFC433B12EE23DD
7 изменённых файлов: 107 добавлений и 12 удалений

32
benchmark/mjit_exivar.yml Normal file
Просмотреть файл

@ -0,0 +1,32 @@
prelude: |
# frozen_string_literal: true
class Bench < Hash
def initialize
@exivar = nil
end
def exivar
@exivar
end
end
bench = Bench.new
if defined?(RubyVM::MJIT) && RubyVM::MJIT.enabled?
jit_min_calls = 10000
i = 0
while i < jit_min_calls
bench.exivar
i += 1
end
RubyVM::MJIT.pause # compile (1)
# issue recompile
bench.exivar
RubyVM::MJIT.resume
RubyVM::MJIT.pause # compile (2)
end
benchmark:
mjit_exivar: bench.exivar
loop_count: 200000000

Просмотреть файл

@ -328,6 +328,7 @@ RB_DEBUG_COUNTER(mjit_frame_JT2VM)
/* MJIT cancel counters */
RB_DEBUG_COUNTER(mjit_cancel)
RB_DEBUG_COUNTER(mjit_cancel_ivar_inline)
RB_DEBUG_COUNTER(mjit_cancel_exivar_inline)
RB_DEBUG_COUNTER(mjit_cancel_send_inline)
RB_DEBUG_COUNTER(mjit_cancel_opt_insn) /* CALL_SIMPLE_METHOD */
RB_DEBUG_COUNTER(mjit_cancel_invalidate_all)

4
mjit.h
Просмотреть файл

@ -62,8 +62,10 @@ struct mjit_options {
// State of optimization switches
struct rb_mjit_compile_info {
// Disable getinstancevariable/setinstancevariable optimizations based on inline cache
// Disable getinstancevariable/setinstancevariable optimizations based on inline cache (T_OBJECT)
bool disable_ivar_cache;
// Disable getinstancevariable/setinstancevariable optimizations based on inline cache (FL_EXIVAR)
bool disable_exivar_cache;
// Disable send/opt_send_without_block optimizations based on inline cache
bool disable_send_cache;
// Disable method inlining

Просмотреть файл

@ -279,6 +279,12 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct
fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, "\nexivar_cancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_exivar_inline);\n");
fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_exivar_cache = true;\n");
fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n");
fprintf(f, " goto cancel;\n");
fprintf(f, "\ncancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n");
if (status->local_stack_p) {

Просмотреть файл

@ -3,6 +3,7 @@ require 'rbconfig'
module JITSupport
JIT_TIMEOUT = 600 # 10min for each...
JIT_SUCCESS_PREFIX = 'JIT success \(\d+\.\dms\)'
JIT_RECOMPILE_PREFIX = 'JIT recompile'
JIT_COMPACTION_PREFIX = 'JIT compaction \(\d+\.\dms\)'
UNSUPPORTED_COMPILERS = [
%r[\A.*/bin/intel64/icc\b],

Просмотреть файл

@ -778,6 +778,25 @@ class TestJIT < Test::Unit::TestCase
end;
end
def test_inlined_exivar
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "aaa", success_count: 3, recompile_count: 1, min_calls: 2)
begin;
class Foo < Hash
def initialize
@a = :a
end
def bar
@a
end
end
print(Foo.new.bar)
print(Foo.new.bar) # compile #initialize, #bar -> recompile #bar
print(Foo.new.bar) # compile #bar with exivar
end;
end
def test_inlined_undefined_ivar
assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 3, min_calls: 3)
begin;
@ -1065,12 +1084,13 @@ class TestJIT < Test::Unit::TestCase
end
# Shorthand for normal test cases
def assert_eval_with_jit(script, stdout: nil, success_count:, min_calls: 1, max_cache: 1000, insns: [], uplevel: 1, ignorable_patterns: [])
def assert_eval_with_jit(script, stdout: nil, success_count:, recompile_count: nil, min_calls: 1, max_cache: 1000, insns: [], uplevel: 1, ignorable_patterns: [])
out, err = eval_with_jit(script, verbose: 1, min_calls: min_calls, max_cache: max_cache)
actual = err.scan(/^#{JIT_SUCCESS_PREFIX}:/).size
success_actual = err.scan(/^#{JIT_SUCCESS_PREFIX}:/).size
recompile_actual = err.scan(/^#{JIT_RECOMPILE_PREFIX}:/).size
# Add --jit-verbose=2 logs for cl.exe because compiler's error message is suppressed
# for cl.exe with --jit-verbose=1. See `start_process` in mjit_worker.c.
if RUBY_PLATFORM.match?(/mswin/) && success_count != actual
if RUBY_PLATFORM.match?(/mswin/) && success_count != success_actual
out2, err2 = eval_with_jit(script, verbose: 2, min_calls: min_calls, max_cache: max_cache)
end
@ -1080,13 +1100,19 @@ class TestJIT < Test::Unit::TestCase
mark_tested_insn(insn, used_insns: used_insns, uplevel: uplevel + 3)
end
suffix = "script:\n#{code_block(script)}\nstderr:\n#{code_block(err)}#{(
"\nstdout(verbose=2 retry):\n#{code_block(out2)}\nstderr(verbose=2 retry):\n#{code_block(err2)}" if out2 || err2
)}"
assert_equal(
success_count, actual,
"Expected #{success_count} times of JIT success, but succeeded #{actual} times.\n\n"\
"script:\n#{code_block(script)}\nstderr:\n#{code_block(err)}#{(
"\nstdout(verbose=2 retry):\n#{code_block(out2)}\nstderr(verbose=2 retry):\n#{code_block(err2)}" if out2 || err2
)}",
success_count, success_actual,
"Expected #{success_count} times of JIT success, but succeeded #{success_actual} times.\n\n#{suffix}",
)
if recompile_count
assert_equal(
recompile_count, recompile_actual,
"Expected #{success_count} times of JIT recompile, but recompiled #{success_actual} times.\n\n#{suffix}",
)
end
if stdout
assert_equal(stdout, out, "Expected stdout #{out.inspect} to match #{stdout.inspect} with script:\n#{code_block(script)}")
end

Просмотреть файл

@ -16,17 +16,16 @@
% # compiler: Use copied IVC to avoid race condition
IVC ic_copy = &(status->is_entries + ((union iseq_inline_storage_entry *)ic - body->is_entries))->iv_cache;
%
% # compiler: Consider cfp->self as T_OBJECT if ic_copy->ic_serial is set
if (!status->compile_info->disable_ivar_cache && ic_copy->ic_serial) {
% # JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`.
% # <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
%
% # JIT: prepare vm_getivar's arguments and variables
% # JIT: prepare vm_getivar/vm_setivar arguments and variables
fprintf(f, "{\n");
fprintf(f, " VALUE obj = GET_SELF();\n");
fprintf(f, " const rb_serial_t ic_serial = (rb_serial_t)%"PRI_SERIALT_PREFIX"u;\n", ic_copy->ic_serial);
fprintf(f, " const st_index_t index = %"PRIuSIZE";\n", ic_copy->index);
% # JIT: cache hit path of vm_getivar, or cancel JIT.
% # JIT: cache hit path of vm_getivar/vm_setivar, or cancel JIT (recompile it with exivar)
% if insn.name == 'setinstancevariable'
fprintf(f, " VALUE val = stack[%d];\n", b->stack_size - 1);
fprintf(f, " if (LIKELY(RB_TYPE_P(obj, T_OBJECT) && ic_serial == RCLASS_SERIAL(RBASIC(obj)->klass) && index < ROBJECT_NUMIV(obj) && !RB_OBJ_FROZEN(obj))) {\n");
@ -50,5 +49,33 @@
fprintf(f, "}\n");
break;
}
% if insn.name == 'getinstancevariable'
else if (!status->compile_info->disable_exivar_cache && ic_copy->ic_serial) {
% # JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`.
% # <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
%
% # JIT: prepare vm_getivar's arguments and variables
fprintf(f, "{\n");
fprintf(f, " VALUE obj = GET_SELF();\n");
fprintf(f, " const rb_serial_t ic_serial = (rb_serial_t)%"PRI_SERIALT_PREFIX"u;\n", ic_copy->ic_serial);
fprintf(f, " const st_index_t index = %"PRIuSIZE";\n", ic_copy->index);
% # JIT: cache hit path of vm_getivar, or cancel JIT (recompile it without any ivar optimization)
fprintf(f, " struct gen_ivtbl *ivtbl;\n");
fprintf(f, " VALUE val;\n");
fprintf(f, " if (LIKELY(FL_TEST_RAW(obj, FL_EXIVAR) && ic_serial == RCLASS_SERIAL(RBASIC(obj)->klass) && st_lookup(rb_ivar_generic_ivtbl(), (st_data_t)obj, (st_data_t *)&ivtbl) && index < ivtbl->numiv && (val = ivtbl->ivptr[index]) != Qundef)) {\n");
fprintf(f, " stack[%d] = val;\n", b->stack_size);
fprintf(f, " }\n");
fprintf(f, " else {\n");
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
fprintf(f, " goto exivar_cancel;\n");
fprintf(f, " }\n");
% # compiler: Move JIT compiler's internal stack pointer
b->stack_size += <%= insn.call_attribute('sp_inc') %>;
fprintf(f, "}\n");
break;
}
% end
}
#endif // OPT_IC_FOR_IVAR