From 9b6b4674d77da2ef3f9f15095af9e39cc966b882 Mon Sep 17 00:00:00 2001 From: k0kubun Date: Sun, 14 Apr 2019 04:52:02 +0000 Subject: [PATCH] Recompile JIT-ed code without optimization based on inline cache when JIT cancel happens by that. This feature was in the original MJIT implementation by Vladimir, but on merging MJIT to Ruby it was removed for simplification. This commit adds the functionality again for the following benchmark: https://github.com/benchmark-driver/misc/blob/52f05781f65467baf895bf6ba79d172c9b0826fd/concurrent-map/bench.rb (shown float is duration seconds. shorter is better) * Before ``` $ INHERIT=0 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] -- 1.6507579649914987 $ INHERIT=0 ruby -v --jit bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] -- 1.5091587850474752 $ INHERIT=1 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] -- 1.6124781150138006 $ INHERIT=1 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] -- 1.7495657080435194 # <-- this ``` * After ``` $ INHERIT=0 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.653559010999743 $ INHERIT=0 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.4738391840364784 $ INHERIT=1 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.645227018976584 $ INHERIT=1 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.523708809982054 # <-- this ``` git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67530 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- mjit.c | 58 +++++++++++++++++++---- mjit.h | 14 +++++- mjit_compile.c | 4 ++ mjit_worker.c | 4 ++ test/ruby/test_jit.rb | 5 +- tool/ruby_vm/views/_mjit_compile_ivar.erb | 4 +- tool/ruby_vm/views/_mjit_compile_send.erb | 4 +- 7 files changed, 79 insertions(+), 14 deletions(-) diff --git a/mjit.c b/mjit.c index a1c8f238c2..d025ba1643 100644 --- a/mjit.c +++ b/mjit.c @@ -299,16 +299,16 @@ unload_units(void) verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length); } -/* Add ISEQ to be JITed in parallel with the current thread. - Unload some JIT codes if there are too many of them. */ -void -mjit_add_iseq_to_process(const rb_iseq_t *iseq) +static void +mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info) { if (!mjit_enabled || pch_status == PCH_FAILED) return; iseq->body->jit_func = (mjit_func_t)NOT_READY_JIT_ISEQ_FUNC; create_unit(iseq); + if (compile_info != NULL) + iseq->body->jit_unit->compile_info = *compile_info; if (iseq->body->jit_unit == NULL) /* Failure in creating the unit. */ return; @@ -323,13 +323,19 @@ mjit_add_iseq_to_process(const rb_iseq_t *iseq) CRITICAL_SECTION_FINISH(3, "in add_iseq_to_process"); } +/* Add ISEQ to be JITed in parallel with the current thread. + Unload some JIT codes if there are too many of them. */ +void +rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq) +{ + mjit_add_iseq_to_process(iseq, NULL); +} + /* For this timeout seconds, --jit-wait will wait for JIT compilation finish. */ #define MJIT_WAIT_TIMEOUT_SECONDS 60 -/* Wait for JIT compilation finish for --jit-wait, and call the function pointer - if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */ -VALUE -mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body) +static void +mjit_wait(struct rb_iseq_constant_body *body) { struct timeval tv; int tries = 0; @@ -350,13 +356,48 @@ mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body) CRITICAL_SECTION_FINISH(3, "in mjit_wait_call for a client wakeup"); rb_thread_wait_for(tv); } +} +/* Wait for JIT compilation finish for --jit-wait, and call the function pointer + if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */ +VALUE +mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body) +{ + mjit_wait(body); if ((uintptr_t)body->jit_func <= (uintptr_t)LAST_JIT_ISEQ_FUNC) { return Qundef; } return body->jit_func(ec, ec->cfp); } +struct rb_mjit_compile_info* +rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body) +{ + assert(body->jit_unit != NULL); + return &body->jit_unit->compile_info; +} + +void +rb_mjit_recompile_iseq(const rb_iseq_t *iseq) +{ + if ((ptrdiff_t)iseq->body->jit_func <= (ptrdiff_t)LAST_JIT_ISEQ_FUNC) + return; + + verbose(1, "JIT recompile: %s@%s:%d", RSTRING_PTR(iseq->body->location.label), + RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno)); + + CRITICAL_SECTION_START(3, "in rb_mjit_recompile_iseq"); + remove_from_list(iseq->body->jit_unit, &active_units); + iseq->body->jit_func = (void *)NOT_ADDED_JIT_ISEQ_FUNC; + add_to_list(iseq->body->jit_unit, &stale_units); + CRITICAL_SECTION_FINISH(3, "in rb_mjit_recompile_iseq"); + + mjit_add_iseq_to_process(iseq, &iseq->body->jit_unit->compile_info); + if (UNLIKELY(mjit_opts.wait)) { + mjit_wait(iseq->body); + } +} + extern VALUE ruby_archlibdir_path, ruby_prefix_path; // Initialize header_file, pch_file, libruby_pathflag. Return true on success. @@ -818,6 +859,7 @@ mjit_finish(bool close_handle_p) free_list(&unit_queue, close_handle_p); free_list(&active_units, close_handle_p); free_list(&compact_units, close_handle_p); + free_list(&stale_units, close_handle_p); finish_conts(); mjit_enabled = false; diff --git a/mjit.h b/mjit.h index 6fb15b402c..2e135287e2 100644 --- a/mjit.h +++ b/mjit.h @@ -55,14 +55,24 @@ struct mjit_options { int max_cache_size; }; +// State of optimization switches +struct rb_mjit_compile_info { + // Disable getinstancevariable/setinstancevariable optimizations based on inline cache + bool disable_ivar_cache; + // Disable send/opt_send_without_block optimizations based on inline cache + bool disable_send_cache; +}; + typedef VALUE (*mjit_func_t)(rb_execution_context_t *, rb_control_frame_t *); RUBY_SYMBOL_EXPORT_BEGIN RUBY_EXTERN struct mjit_options mjit_opts; RUBY_EXTERN bool mjit_call_p; -extern void mjit_add_iseq_to_process(const rb_iseq_t *iseq); +extern void rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq); extern VALUE mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body); +extern struct rb_mjit_compile_info* rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body); +extern void rb_mjit_recompile_iseq(const rb_iseq_t *iseq); RUBY_SYMBOL_EXPORT_END extern bool mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname); @@ -120,7 +130,7 @@ mjit_exec(rb_execution_context_t *ec) RB_DEBUG_COUNTER_INC(mjit_exec_not_added); if (total_calls == mjit_opts.min_calls && mjit_target_iseq_p(body)) { RB_DEBUG_COUNTER_INC(mjit_exec_not_added_add_iseq); - mjit_add_iseq_to_process(iseq); + rb_mjit_add_iseq_to_process(iseq); if (UNLIKELY(mjit_opts.wait)) { return mjit_wait_call(ec, body); } diff --git a/mjit_compile.c b/mjit_compile.c index 8a3b6b3437..9341970e01 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -37,6 +37,8 @@ struct compile_status { // Safely-accessible cache entries copied from main thread. union iseq_inline_storage_entry *is_entries; struct rb_call_cache *cc_entries; + // Mutated optimization levels + struct rb_mjit_compile_info *compile_info; }; /* Storage to keep data which is consistent in each conditional branch. @@ -213,6 +215,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname) alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL, .is_entries = (body->is_size > 0) ? alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL, + .compile_info = rb_mjit_iseq_compile_info(body), }; memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); if ((status.cc_entries != NULL || status.is_entries != NULL) @@ -235,6 +238,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname) else { fprintf(f, " VALUE *stack = reg_cfp->sp;\n"); } + fprintf(f, " static const rb_iseq_t *original_iseq = 0x%"PRIxVALUE";\n", (VALUE)iseq); fprintf(f, " static const VALUE *const original_body_iseq = (VALUE *)0x%"PRIxVALUE";\n", (VALUE)body->iseq_encoded); diff --git a/mjit_worker.c b/mjit_worker.c index 205082f7a9..9936c255d9 100644 --- a/mjit_worker.c +++ b/mjit_worker.c @@ -144,6 +144,8 @@ struct rb_mjit_unit { /* Only used by unload_units. Flag to check this unit is currently on stack or not. */ char used_code_p; struct list_node unode; + // mjit_compile's optimization switches + struct rb_mjit_compile_info compile_info; }; /* Linked list of struct rb_mjit_unit. */ @@ -184,6 +186,8 @@ static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) } static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) }; /* List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`. */ static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) }; +// List of units before recompilation and just waiting for dlclose(). +static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) }; /* The number of so far processed ISEQs, used to generate unique id. */ static int current_unit_num; /* A mutex for conitionals and critical sections. */ diff --git a/test/ruby/test_jit.rb b/test/ruby/test_jit.rb index 0079741a04..50bfcefac7 100644 --- a/test/ruby/test_jit.rb +++ b/test/ruby/test_jit.rb @@ -10,6 +10,7 @@ class TestJIT < Test::Unit::TestCase include JITSupport IGNORABLE_PATTERNS = [ + /\AJIT recompile: .+\n\z/, /\ASuccessful MJIT finish\n\z/, ] @@ -529,7 +530,7 @@ class TestJIT < Test::Unit::TestCase end; # send call -> optimized call (send JIT) -> optimized call - assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 1, min_calls: 2) + assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 2, min_calls: 2) begin; obj = Object.new def obj.[](h) @@ -704,7 +705,7 @@ class TestJIT < Test::Unit::TestCase end def test_inlined_undefined_ivar - assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 2, min_calls: 3) + assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 3, min_calls: 3) begin; class Foo def initialize diff --git a/tool/ruby_vm/views/_mjit_compile_ivar.erb b/tool/ruby_vm/views/_mjit_compile_ivar.erb index 7275e86338..8fdf0b80fe 100644 --- a/tool/ruby_vm/views/_mjit_compile_ivar.erb +++ b/tool/ruby_vm/views/_mjit_compile_ivar.erb @@ -17,7 +17,7 @@ IC ic_copy = &(status->is_entries + ((union iseq_inline_storage_entry *)ic - body->is_entries))->cache; % % # compiler: Consider cfp->self as T_OBJECT if ic_copy->ic_serial is set - if (ic_copy->ic_serial) { + if (!status->compile_info->disable_ivar_cache && ic_copy->ic_serial) { % # JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`. % # <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%> % @@ -43,6 +43,8 @@ fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_ivar);\n"); + fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_ivar_cache = true;\n"); + fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n"); fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 158fdb84bd..90b9a07a0e 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -16,7 +16,7 @@ % # compiler: Use copied cc to avoid race condition CALL_CACHE cc_copy = status->cc_entries + (cc - body->cc_entries); % - if (has_valid_method_type(cc_copy)) { + if (!status->compile_info->disable_send_cache && has_valid_method_type(cc_copy)) { const rb_iseq_t *iseq; unsigned int argc = ci->orig_argc; // this `argc` variable is for calculating a value's position on stack considering `blockarg`. % if insn.name == 'send' @@ -39,6 +39,8 @@ fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n"); + fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_send_cache = true;\n"); + fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n"); fprintf(f, " goto cancel;\n"); fprintf(f, " }\n");