This change fixes some cases where YJIT fails to fire tracing events.
Most of the situations YJIT did not handle correctly involves enabling
tracing while running inside generated code.

A new operation to invalidate all generated code is added, which uses
patching to make generated code exit at the next VM instruction
boundary. A new routine called `jit_prepare_routine_call()` is
introduced to facilitate this and should be used when generating code
that could allocate, or could otherwise use `RB_VM_LOCK_ENTER()`.

The `c_return` event is fired in the middle of an instruction as opposed
to at an instruction boundary, so it requires special handling. C method
call return points are patched to go to a fucntion which does everything
the interpreter does, including firing the `c_return` event. The
generated code for C method calls normally does not fire the event.

Invalided code should not change after patching so the exits are not
clobbered. A new variable is introduced to track the region of code that
should not change.
This commit is contained in:
Alan Wu 2021-08-25 17:00:45 -04:00
Родитель 0562459473
Коммит bd876c243a
11 изменённых файлов: 550 добавлений и 84 удалений

Просмотреть файл

@ -32,7 +32,6 @@ To cite this repository in your publications, please use this bibtex snippet:
YJIT is a work in progress and as such may not yet be mature enough for mission-critical software. Below is a list of known limitations, all of which we plan to eventually address:
- No support for the `TracePoint` API (see [#54](https://github.com/Shopify/yjit/issues/54)).
- No garbage collection for generated code.
Because there is no GC for generated code yet, your software could run out of executable memory if it is large enough. You can change how much executable memory is allocated using [YJIT's command-line options](https://github.com/Shopify/yjit#command-line-options).

Просмотреть файл

@ -1612,3 +1612,217 @@ end
bar(123, 1.1)
bar(123, 1.1)
}
# test enabling a line TracePoint in a C method call
assert_equal '[[:line, true]]', %q{
events = []
events.instance_variable_set(
:@tp,
TracePoint.new(:line) { |tp| events << [tp.event, tp.lineno] if tp.path == __FILE__ }
)
def events.to_str
@tp.enable; ''
end
# Stay in generated code while enabling tracing
def events.compiled(obj)
String(obj)
@tp.disable; __LINE__
end
line = events.compiled(events)
events[0][-1] = (events[0][-1] == line)
events
}
# test enabling a c_return TracePoint in a C method call
assert_equal '[[:c_return, :String, :string_alias, "events_to_str"]]', %q{
events = []
events.instance_variable_set(:@tp, TracePoint.new(:c_return) { |tp| events << [tp.event, tp.method_id, tp.callee_id, tp.return_value] })
def events.to_str
@tp.enable; 'events_to_str'
end
# Stay in generated code while enabling tracing
alias string_alias String
def events.compiled(obj)
string_alias(obj)
@tp.disable
end
events.compiled(events)
events
}
# test enabling a TracePoint that targets a particular line in a C method call
assert_equal '[true]', %q{
events = []
events.instance_variable_set(:@tp, TracePoint.new(:line) { |tp| events << tp.lineno })
def events.to_str
@tp.enable(target: method(:compiled))
''
end
# Stay in generated code while enabling tracing
def events.compiled(obj)
String(obj)
__LINE__
end
line = events.compiled(events)
events[0] = (events[0] == line)
events
}
# test enabling tracing in the middle of splatarray
assert_equal '[true]', %q{
events = []
obj = Object.new
obj.instance_variable_set(:@tp, TracePoint.new(:line) { |tp| events << tp.lineno })
def obj.to_a
@tp.enable(target: method(:compiled))
[]
end
# Enable tracing in the middle of the splatarray instruction
def obj.compiled(obj)
* = *obj
__LINE__
end
obj.compiled([])
line = obj.compiled(obj)
events[0] = (events[0] == line)
events
}
# test enabling tracing in the middle of opt_aref. Different since the codegen
# for it ends in a jump.
assert_equal '[true]', %q{
def lookup(hash, tp)
hash[42]
tp.disable; __LINE__
end
lines = []
tp = TracePoint.new(:line) { lines << _1.lineno if _1.path == __FILE__ }
lookup(:foo, tp)
lookup({}, tp)
enable_tracing_on_missing = Hash.new { tp.enable }
expected_line = lookup(enable_tracing_on_missing, tp)
lines[0] = true if lines[0] == expected_line
lines
}
# test enabling c_call tracing before compiling
assert_equal '[[:c_call, :itself]]', %q{
def shouldnt_compile
itself
end
events = []
tp = TracePoint.new(:c_call) { |tp| events << [tp.event, tp.method_id] }
# assume first call compiles
tp.enable { shouldnt_compile }
events
}
# test enabling c_return tracing before compiling
assert_equal '[[:c_return, :itself, main]]', %q{
def shouldnt_compile
itself
end
events = []
tp = TracePoint.new(:c_return) { |tp| events << [tp.event, tp.method_id, tp.return_value] }
# assume first call compiles
tp.enable { shouldnt_compile }
events
}
# test enabling tracing for a suspended fiber
assert_equal '[[:return, 42]]', %q{
def traced_method
Fiber.yield
42
end
events = []
tp = TracePoint.new(:return) { events << [_1.event, _1.return_value] }
# assume first call compiles
fiber = Fiber.new { traced_method }
fiber.resume
tp.enable(target: method(:traced_method))
fiber.resume
events
}
# test compiling on non-tracing ractor then running on a tracing one
assert_equal '[:itself]', %q{
def traced_method
itself
end
tracing_ractor = Ractor.new do
# 1: start tracing
events = []
tp = TracePoint.new(:c_call) { events << _1.method_id }
tp.enable
Ractor.yield(nil)
# 3: run comipled method on tracing ractor
Ractor.yield(nil)
traced_method
events
ensure
tp&.disable
end
tracing_ractor.take
# 2: compile on non tracing ractor
traced_method
tracing_ractor.take
tracing_ractor.take
}
# Try to hit a lazy branch stub while another ractor enables tracing
assert_equal '42', %q{
def compiled(arg)
if arg
arg + 1
else
itself
itself
end
end
ractor = Ractor.new do
compiled(false)
Ractor.yield(nil)
compiled(41)
end
tp = TracePoint.new(:line) { itself }
ractor.take
tp.enable
ractor.take
}

Просмотреть файл

@ -7024,7 +7024,6 @@ iseq.$(OBJEXT): {$(VPATH)}vm_callinfo.h
iseq.$(OBJEXT): {$(VPATH)}vm_core.h
iseq.$(OBJEXT): {$(VPATH)}vm_opts.h
iseq.$(OBJEXT): {$(VPATH)}yjit.h
iseq.$(OBJEXT): {$(VPATH)}yjit_asm.h
load.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h
load.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h
load.$(OBJEXT): $(CCAN_DIR)/list/list.h
@ -16722,6 +16721,7 @@ yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/gc.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/imemo.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/object.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/re.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/serial.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
yjit_codegen.$(OBJEXT): $(top_srcdir)/internal/string.h
@ -16746,6 +16746,7 @@ yjit_codegen.$(OBJEXT): {$(VPATH)}darray.h
yjit_codegen.$(OBJEXT): {$(VPATH)}debug_counter.h
yjit_codegen.$(OBJEXT): {$(VPATH)}defines.h
yjit_codegen.$(OBJEXT): {$(VPATH)}encoding.h
yjit_codegen.$(OBJEXT): {$(VPATH)}gc.h
yjit_codegen.$(OBJEXT): {$(VPATH)}id.h
yjit_codegen.$(OBJEXT): {$(VPATH)}id_table.h
yjit_codegen.$(OBJEXT): {$(VPATH)}insns.def
@ -16898,6 +16899,9 @@ yjit_codegen.$(OBJEXT): {$(VPATH)}missing.h
yjit_codegen.$(OBJEXT): {$(VPATH)}node.h
yjit_codegen.$(OBJEXT): {$(VPATH)}onigmo.h
yjit_codegen.$(OBJEXT): {$(VPATH)}oniguruma.h
yjit_codegen.$(OBJEXT): {$(VPATH)}probes.dmyh
yjit_codegen.$(OBJEXT): {$(VPATH)}probes.h
yjit_codegen.$(OBJEXT): {$(VPATH)}probes_helper.h
yjit_codegen.$(OBJEXT): {$(VPATH)}ruby_assert.h
yjit_codegen.$(OBJEXT): {$(VPATH)}ruby_atomic.h
yjit_codegen.$(OBJEXT): {$(VPATH)}st.h

16
iseq.c
Просмотреть файл

@ -3181,14 +3181,6 @@ typedef struct insn_data_struct {
} insn_data_t;
static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2];
#include "yjit_asm.h"
void
rb_vm_encoded_insn_data_table_init(void)
{
@ -3305,10 +3297,6 @@ iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events,
VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
#if USE_MJIT
// Force write the jit function to NULL
*((jit_func_t *)(&body->jit_func)) = 0;
#endif
for (pc=0; pc<body->iseq_size;) {
const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
@ -3445,10 +3433,6 @@ rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true);
}
#if USE_MJIT
// Force write the jit function to NULL
*((jit_func_t *)(&body->jit_func)) = 0;
#endif
}
}

Просмотреть файл

@ -30,6 +30,7 @@
#include "ruby/debug.h"
#include "vm_core.h"
#include "ruby/ractor.h"
#include "yjit.h"
#include "builtin.h"
@ -97,6 +98,8 @@ update_global_event_hook(rb_event_flag_t vm_events)
rb_clear_attr_ccs();
}
yjit_tracing_invalidate_all();
ruby_vm_event_flags = vm_events;
ruby_vm_event_enabled_global_flags |= vm_events;
rb_objspace_set_event_hook(vm_events);
@ -1212,6 +1215,8 @@ rb_tracepoint_enable_for_target(VALUE tpval, VALUE target, VALUE target_line)
rb_raise(rb_eArgError, "can not enable any hooks");
}
yjit_tracing_invalidate_all();
ruby_vm_event_local_num++;
tp->tracing = 1;

1
yjit.h
Просмотреть файл

@ -73,5 +73,6 @@ void rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body);
void rb_yjit_iseq_free(const struct rb_iseq_constant_body *body);
void rb_yjit_before_ractor_spawn(void);
void yjit_constant_ic_update(const rb_iseq_t *iseq, IC ic);
void yjit_tracing_invalidate_all(void);
#endif // #ifndef YJIT_H

Просмотреть файл

@ -1,17 +1,20 @@
#include <assert.h>
#include "insns.inc"
#include "internal.h"
#include "insns.inc"
#include "vm_core.h"
#include "vm_sync.h"
#include "vm_callinfo.h"
#include "builtin.h"
#include "gc.h"
#include "internal/compile.h"
#include "internal/class.h"
#include "internal/object.h"
#include "internal/sanitizers.h"
#include "internal/string.h"
#include "internal/variable.h"
#include "internal/re.h"
#include "insns_info.inc"
#include "probes.h"
#include "probes_helper.h"
#include "yjit.h"
#include "yjit_iface.h"
#include "yjit_core.h"
@ -36,6 +39,25 @@ codeblock_t* ocb = NULL;
// Code for exiting back to the interpreter from the leave insn
static void *leave_exit_code;
// Code for full logic of returning from C method and exiting to the interpreter
static uint32_t outline_full_cfunc_return_pos;
// For implementing global code invalidation
struct codepage_patch {
uint32_t mainline_patch_pos;
uint32_t outline_target_pos;
};
typedef rb_darray(struct codepage_patch) patch_array_t;
static patch_array_t global_inval_patches = NULL;
// This number keeps track of the number of bytes counting from the beginning
// of the page that should not be changed. After patching for global
// invalidation, no one should make changes to the invalidated code region
// anymore.
uint32_t yjit_codepage_frozen_bytes = 0;
// Print the current source location for debugging purposes
RBIMPL_ATTR_MAYBE_UNUSED()
static void
@ -156,6 +178,28 @@ jit_save_sp(jitstate_t* jit, ctx_t* ctx)
}
}
// jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
// could:
// - Perform GC allocation
// - Take the VM loock through RB_VM_LOCK_ENTER()
// - Perform Ruby method call
static void
jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
{
jit->record_boundary_patch_point = true;
jit_save_pc(jit, scratch_reg);
jit_save_sp(jit, ctx);
}
// Record the current codeblock write position for rewriting into a jump into
// the outline block later. Used to implement global code invalidation.
static void
record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
{
struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
}
static bool jit_guard_known_klass(jitstate_t *jit, ctx_t* ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
#if RUBY_DEBUG
@ -290,15 +334,13 @@ _counted_side_exit(uint8_t *existing_side_exit, int64_t *counter)
// Generate an exit to return to the interpreter
static uint8_t *
yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
static uint32_t
yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
{
uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
const uint32_t code_pos = cb->write_pos;
ADD_COMMENT(cb, "exit to interpreter");
VALUE *exit_pc = jit->pc;
// Generate the code to exit to the interpreters
// Write the adjusted SP back into the CFP
if (ctx->sp_offset != 0) {
@ -329,7 +371,7 @@ yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
mov(cb, RAX, imm_opnd(Qundef));
ret(cb);
return code_ptr;
return code_pos;
}
// Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
@ -363,7 +405,8 @@ yjit_gen_leave_exit(codeblock_t *cb)
static uint8_t *
yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
{
return yjit_gen_exit(jit, ctx, ocb);
uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
return cb_get_ptr(ocb, pos);
}
// Generate a runtime guard that ensures the PC is at the start of the iseq,
@ -399,6 +442,64 @@ yjit_pc_guard(const rb_iseq_t *iseq)
cb_link_labels(cb);
}
// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
// like the interpreter. When tracing for c_return is enabled, we patch the code after
// the C method return to call into this to fire the event.
static void
full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
{
rb_control_frame_t *cfp = ec->cfp;
RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
// CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
// Pop the C func's frame and fire the c_return TracePoint event
// Note that this is the same order as vm_call_cfunc_with_frame().
rb_vm_pop_frame(ec);
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
// Note, this deviates from the interpreter in that users need to enable
// a c_return TracePoint for this DTrace hook to work. A reasonable change
// since the Ruby return event works this way as well.
RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
// Push return value into the caller's stack. We know that it's a frame that
// uses cfp->sp because we are patching a call done with gen_send_cfunc().
ec->cfp->sp[0] = return_value;
ec->cfp->sp++;
}
// Landing code for when c_return tracing is enabled. See full_cfunc_return().
static void
gen_full_cfunc_return(void)
{
codeblock_t *cb = ocb;
outline_full_cfunc_return_pos = ocb->write_pos;
// This chunk of code expect REG_EC to be filled properly and
// RAX to contain the return value of the C method.
// Call full_cfunc_return()
mov(cb, C_ARG_REGS[0], REG_EC);
mov(cb, C_ARG_REGS[1], RAX);
call_ptr(cb, REG0, (void *)full_cfunc_return);
// Count the exit
GEN_COUNTER_INC(cb, traced_cfunc_return);
// Return to the interpreter
pop(cb, REG_SP);
pop(cb, REG_EC);
pop(cb, REG_CFP);
mov(cb, RAX, imm_opnd(Qundef));
ret(cb);
}
/*
Compile an interpreter entry block to be inserted into an iseq
Returns `NULL` if compilation fails.
@ -473,6 +574,13 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
// We are at the end of the current instruction. Record the boundary.
if (jit->record_boundary_patch_point) {
uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, ocb);
record_global_inval_patch(cb, exit_pos);
jit->record_boundary_patch_point = false;
}
// Generate the jump instruction
gen_direct_jump(
jit->block,
@ -536,6 +644,14 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
jit.pc = pc;
jit.opcode = opcode;
// If previous instruction requested to record the boundary
if (jit.record_boundary_patch_point) {
// Generate an exit to this instruction and record it
uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
record_global_inval_patch(cb, exit_pos);
jit.record_boundary_patch_point = false;
}
// Verify our existing assumption (DEBUG)
if (jit_at_current_insn(&jit)) {
verify_ctx(&jit, ctx);
@ -546,7 +662,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
if (!gen_fn) {
// If we reach an unknown instruction,
// exit to the interpreter and stop compiling
yjit_gen_exit(&jit, ctx, cb);
yjit_gen_exit(jit.pc, ctx, cb);
break;
}
@ -576,7 +692,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
// TODO: if the codegen funcion makes changes to ctx and then return YJIT_CANT_COMPILE,
// the exit this generates would be wrong. We could save a copy of the entry context
// and assert that ctx is the same here.
yjit_gen_exit(&jit, ctx, cb);
yjit_gen_exit(jit.pc, ctx, cb);
break;
}
@ -596,6 +712,10 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
// Store the index of the last instruction in the block
block->end_idx = insn_idx;
// We currently can't handle cases where the request is for a block that
// doesn't go to the next instruction.
RUBY_ASSERT(!jit.record_boundary_patch_point);
if (YJIT_DUMP_MODE >= 2) {
// Dump list of compiled instrutions
fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
@ -735,8 +855,7 @@ gen_newarray(jitstate_t* jit, ctx_t* ctx)
rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
@ -760,8 +879,7 @@ gen_duparray(jitstate_t* jit, ctx_t* ctx)
VALUE ary = jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
// call rb_ary_resurrect(VALUE ary);
jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
@ -783,8 +901,7 @@ gen_splatarray(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
@ -908,8 +1025,7 @@ gen_newhash(jitstate_t* jit, ctx_t* ctx)
if (n == 0) {
// Save the PC and SP because we are allocating
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
// val = rb_hash_new();
call_ptr(cb, REG0, (void *)rb_hash_new);
@ -1559,8 +1675,7 @@ gen_setinstancevariable(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
@ -1611,8 +1726,7 @@ gen_defined(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
@ -1706,8 +1820,7 @@ gen_concatstrings(jitstate_t* jit, ctx_t* ctx)
rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
@ -1975,15 +2088,13 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx)
// Call VALUE rb_hash_aref(VALUE hash, VALUE key).
{
// Write incremented pc to cfp->pc as the routine can raise and allocate
jit_save_pc(jit, REG0);
// About to change REG_SP which these operands depend on. Yikes.
mov(cb, C_ARG_REGS[0], recv_opnd);
mov(cb, C_ARG_REGS[1], idx_opnd);
// Write incremented pc to cfp->pc as the routine can raise and allocate
// Write sp to cfp->sp since rb_hash_aref might need to call #hash on the key
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
call_ptr(cb, REG0, (void *)rb_hash_aref);
@ -2009,8 +2120,7 @@ gen_opt_aset(jitstate_t *jit, ctx_t *ctx)
{
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
uint8_t* side_exit = yjit_side_exit(jit, ctx);
@ -2177,8 +2287,7 @@ gen_opt_mod(jitstate_t* jit, ctx_t* ctx)
{
// Save the PC and SP because the callee may allocate bignums
// Note that this modifies REG_SP, which is why we do it first
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
uint8_t* side_exit = yjit_side_exit(jit, ctx);
@ -2691,6 +2800,25 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
return YJIT_CANT_COMPILE;
}
// Don't JIT if tracing c_call or c_return
{
rb_event_flag_t tracing_events;
if (rb_multi_ractor_p()) {
tracing_events = ruby_vm_event_enabled_global_flags;
}
else {
// We could always use ruby_vm_event_enabled_global_flags,
// but since events are never removed from it, doing so would mean
// we don't compile even after tracing is disabled.
tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
}
if (tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN)) {
GEN_COUNTER_INC(cb, send_cfunc_tracing);
return YJIT_CANT_COMPILE;
}
}
// Delegate to codegen for C methods if we have it.
{
method_codegen_t known_cfunc_codegen;
@ -2842,6 +2970,9 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
// Invalidation logic is in rb_yjit_method_lookup_change()
call_ptr(cb, REG0, (void*)cfunc->func);
// Record code position for TracePoint patching. See full_cfunc_return().
record_global_inval_patch(cb, outline_full_cfunc_return_pos);
// Push the return value on the Ruby stack
x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
mov(cb, stack_ret, RAX);
@ -2856,7 +2987,7 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
// cfunc calls may corrupt types
ctx_clear_local_types(ctx);
// Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0
// Note: gen_send_iseq() jumps to the next instruction with ctx->sp_offset == 0
// after the call, while this does not. This difference prevents
// the two call types from sharing the same successor.
@ -3480,8 +3611,7 @@ gen_getglobal(jitstate_t* jit, ctx_t* ctx)
ID gid = jit_get_arg(jit, 0);
// Save the PC and SP because we might make a Ruby call for warning
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
mov(cb, C_ARG_REGS[0], imm_opnd(gid));
@ -3500,8 +3630,7 @@ gen_setglobal(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
mov(cb, C_ARG_REGS[0], imm_opnd(gid));
@ -3519,8 +3648,7 @@ gen_tostring(jitstate_t* jit, ctx_t* ctx)
{
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t str = ctx_stack_pop(ctx, 1);
x86opnd_t val = ctx_stack_pop(ctx, 1);
@ -3545,8 +3673,7 @@ gen_toregexp(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because this allocates an object and could
// raise an exception.
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
ctx_stack_pop(ctx, cnt);
@ -3678,8 +3805,7 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx)
}
// If the calls don't allocate, do they need up to date PC, SP?
jit_save_pc(jit, REG0);
jit_save_sp(jit, ctx);
jit_prepare_routine_call(jit, ctx, REG0);
if (bf->argc > 0) {
// Load environment pointer EP from CFP
@ -3706,6 +3832,107 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx)
return YJIT_KEEP_COMPILING;
}
static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
// Invalidate all generated code and patch C method return code to contain
// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
// means they are inside a C routine. If there are any generated code on-stack,
// they are waiting for a return from a C routine. For every routine call, we
// patch in an exit after the body of the containing VM instruction. This makes
// it so all the invalidated code exit as soon as execution logically reaches
// the next VM instruction.
// The c_return event needs special handling as our codegen never outputs code
// that contains tracing logic. If we let the normal output code run until the
// start of the next VM instruction by relying on the patching scheme above, we
// would fail to fire the c_return event. To handle it, we patch in the full
// logic at the return address. See full_cfunc_return().
// In addition to patching, we prevent future entries into invalidated code by
// removing all live blocks from their iseq.
void
yjit_tracing_invalidate_all(void)
{
if (!rb_yjit_enabled_p()) return;
// Stop other ractors since we are going to patch machine code.
RB_VM_LOCK_ENTER();
rb_vm_barrier();
// Make it so all live block versions are no longer valid branch targets
rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
// Apply patches
const uint32_t old_pos = cb->write_pos;
rb_darray_for(global_inval_patches, patch_idx) {
struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
cb_set_pos(cb, patch.mainline_patch_pos);
uint8_t *jump_target = cb_get_ptr(ocb, patch.outline_target_pos);
jmp_ptr(cb, jump_target);
}
cb_set_pos(cb, old_pos);
// Freeze invalidated part of the codepage. We only want to wait for
// running instances of the code to exit from now on, so we shouldn't
// change the code. There could be other ractors sleeping in
// branch_stub_hit(), for example. We could harden this by changing memory
// protection on the frozen range.
RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
yjit_codepage_frozen_bytes = old_pos;
RB_VM_LOCK_LEAVE();
}
static int
tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
{
VALUE v = (VALUE)vstart;
for (; v != (VALUE)vend; v += stride) {
void *ptr = asan_poisoned_object_p(v);
asan_unpoison_object(v, false);
if (rb_obj_is_iseq(v)) {
rb_iseq_t *iseq = (rb_iseq_t *)v;
invalidate_all_blocks_for_tracing(iseq);
}
asan_poison_object_if(ptr, v);
}
return 0;
}
static void
invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
{
struct rb_iseq_constant_body *body = iseq->body;
if (!body) return; // iseq yet to be initialized
ASSERT_vm_locking();
// Empty all blocks on the iseq so we don't compile new blocks that jump to the
// invalidted region.
// TODO Leaking the blocks for now since we might have situations where
// a different ractor is waiting in branch_stub_hit(). If we free the block
// that ractor can wake up with a dangling block.
rb_darray_for(body->yjit_blocks, version_array_idx) {
rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
rb_darray_for(version_array, version_idx) {
// Stop listening for invalidation events like basic operation redefinition.
block_t *block = rb_darray_get(version_array, version_idx);
yjit_unlink_method_lookup_dependency(block);
yjit_block_assumptions_free(block);
}
rb_darray_free(version_array);
}
rb_darray_free(body->yjit_blocks);
body->yjit_blocks = NULL;
#if USE_MJIT
// Reset output code entry point
body->jit_func = NULL;
#endif
}
static void
yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
{
@ -3749,6 +3976,9 @@ yjit_init_codegen(void)
// Generate the interpreter exit code for leave
leave_exit_code = yjit_gen_leave_exit(cb);
// Generate full exit code for C func
gen_full_cfunc_return();
// Map YARV opcodes to the corresponding codegen functions
yjit_reg_op(BIN(nop), gen_nop);
yjit_reg_op(BIN(dup), gen_dup);

Просмотреть файл

@ -7,6 +7,7 @@
// Code blocks we generate code into
extern codeblock_t *cb;
extern codeblock_t *ocb;
extern uint32_t yjit_codepage_frozen_bytes;
// Code generation state
typedef struct JITState
@ -30,6 +31,10 @@ typedef struct JITState
// This allows us to peek at run-time values
rb_execution_context_t* ec;
// Whether we need to record the code address at
// the end of this bytecode instruction for tracing suppoert
bool record_boundary_patch_point;
} jitstate_t;
typedef enum codegen_status {

Просмотреть файл

@ -506,11 +506,12 @@ static size_t get_num_versions(blockid_t blockid)
static void
add_block_version(blockid_t blockid, block_t* block)
{
// Function entry blocks must have stack size 0
RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
const rb_iseq_t *iseq = block->blockid.iseq;
struct rb_iseq_constant_body *body = iseq->body;
// Function entry blocks must have stack size 0
RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
// Ensure yjit_blocks is initialized for this iseq
if (rb_darray_size(body->yjit_blocks) == 0) {
// Initialize yjit_blocks to be as wide as body->iseq_encoded
@ -772,7 +773,7 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex
// If this block hasn't yet been compiled
if (!p_block) {
// If the new block can be generated right after the branch (at cb->write_pos)
if (cb->write_pos == branch->end_pos) {
if (cb->write_pos == branch->end_pos && branch->start_pos >= yjit_codepage_frozen_bytes) {
// This branch should be terminating its block
RUBY_ASSERT(branch->end_pos == branch->block->end_pos);
@ -801,12 +802,14 @@ branch_stub_hit(branch_t* branch, const uint32_t target_idx, rb_execution_contex
branch->dst_addrs[target_idx] = dst_addr;
// Rewrite the branch with the new jump target address
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
uint32_t cur_pos = cb->write_pos;
cb_set_pos(cb, branch->start_pos);
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
RUBY_ASSERT(cb->write_pos == branch->end_pos && "branch can't change size");
cb_set_pos(cb, cur_pos);
if (branch->start_pos >= yjit_codepage_frozen_bytes) {
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
uint32_t cur_pos = cb->write_pos;
cb_set_pos(cb, branch->start_pos);
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
RUBY_ASSERT(cb->write_pos == branch->end_pos && "branch can't change size");
cb_set_pos(cb, cur_pos);
}
// Mark this branch target as patched (no longer a stub)
branch->blocks[target_idx] = p_block;
@ -921,8 +924,7 @@ void gen_direct_jump(
block_t* p_block = find_block_version(target0, ctx);
// If the version already exists
if (p_block)
{
if (p_block) {
rb_darray_append(&p_block->incoming, branch);
branch->dst_addrs[0] = cb_get_ptr(cb, p_block->start_pos);
@ -934,10 +936,9 @@ void gen_direct_jump(
gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
branch->end_pos = cb->write_pos;
}
else
{
// The target block will be compiled right after this one (fallthrough)
// See the loop in gen_block_version()
else {
// This NULL target address signals gen_block_version() to compile the
// target block right after this one (fallthrough).
branch->dst_addrs[0] = NULL;
branch->shape = SHAPE_NEXT0;
branch->start_pos = cb->write_pos;
@ -1048,7 +1049,7 @@ block_array_remove(rb_yjit_block_array_t block_array, block_t *block)
// Invalidate one specific block version
void
invalidate_block_version(block_t* block)
invalidate_block_version(block_t *block)
{
ASSERT_vm_locking();
// TODO: want to assert that all other ractors are stopped here. Can't patch
@ -1067,8 +1068,7 @@ invalidate_block_version(block_t* block)
uint8_t* code_ptr = cb_get_ptr(cb, block->start_pos);
// For each incoming branch
rb_darray_for(block->incoming, incoming_idx)
{
rb_darray_for(block->incoming, incoming_idx) {
branch_t* branch = rb_darray_get(block->incoming, incoming_idx);
uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1;
RUBY_ASSERT(branch->dst_addrs[target_idx] == code_ptr);
@ -1077,6 +1077,11 @@ invalidate_block_version(block_t* block)
// Mark this target as being a stub
branch->blocks[target_idx] = NULL;
// Don't patch frozen code region
if (branch->start_pos < yjit_codepage_frozen_bytes) {
continue;
}
// Create a stub for this branch target
branch->dst_addrs[target_idx] = get_branch_target(
block->blockid,
@ -1088,8 +1093,7 @@ invalidate_block_version(block_t* block)
// Check if the invalidated block immediately follows
bool target_next = block->start_pos == branch->end_pos;
if (target_next)
{
if (target_next) {
// The new block will no longer be adjacent
branch->shape = SHAPE_DEFAULT;
}
@ -1103,8 +1107,13 @@ invalidate_block_version(block_t* block)
branch->block->end_pos = cb->write_pos;
cb_set_pos(cb, cur_pos);
if (target_next && branch->end_pos > block->end_pos)
{
if (target_next && branch->end_pos > block->end_pos) {
fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%d block_size=%d\n",
branch->block->blockid.idx,
block->blockid.idx,
branch->end_pos - block->end_pos,
block->end_pos - block->start_pos);
yjit_print_iseq(branch->block->blockid.iseq);
rb_bug("yjit invalidate rewrote branch past end of invalidated block");
}
}

Просмотреть файл

@ -81,6 +81,17 @@ map_addr2insn(void *code_ptr, int insn)
}
}
// For debugging. Print the disassembly of an iseq.
void
yjit_print_iseq(const rb_iseq_t *iseq)
{
char *ptr;
long len;
VALUE disassembly = rb_iseq_disasm(iseq);
RSTRING_GETMEM(disassembly, ptr, len);
fprintf(stderr, "%.*s\n", (int)len, ptr);
}
int
yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc)
{

Просмотреть файл

@ -54,6 +54,7 @@ YJIT_DECLARE_COUNTERS(
send_cfunc_ruby_array_varg,
send_cfunc_argc_mismatch,
send_cfunc_toomany_args,
send_cfunc_tracing,
send_iseq_tailcall,
send_iseq_arity_error,
send_iseq_only_keywords,
@ -63,6 +64,8 @@ YJIT_DECLARE_COUNTERS(
send_se_cf_overflow,
send_se_protected_check_failed,
traced_cfunc_return,
leave_se_interrupt,
leave_interp_return,
leave_start_pc_non_zero,
@ -105,6 +108,7 @@ RUBY_EXTERN struct rb_yjit_runtime_counters yjit_runtime_counters;
void yjit_map_addr2insn(void *code_ptr, int insn);
VALUE *yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx);
int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
void yjit_print_iseq(const rb_iseq_t *iseq);
void check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_callable_method_entry_t *compile_time_cme);