YJIT: Make block invalidation more robust

This commit adds an entry_exit field to block_t for use in
invalidate_block_version(). By patching the start of the block while
invalidating it, invalidate_block_version() can function correctly
while there is no executable memory left for new branch stubs.

This change additionally fixes correctness for situations where we
cannot patch incoming jumps to the invalidated block. In situations
such as Shopify/yjit#226, the address to the start of the block
is saved and used later, possibly after the block is invalidated.

The assume_* family of function now generate block->entry_exit before
remembering blocks for invalidation.

RubyVM::YJIT.simulate_oom! is introduced for testing out of memory
conditions. The test for it is disabled for now because OOM triggers
other failure conditions not addressed by this commit.

Fixes Shopify/yjit#226
This commit is contained in:
Alan Wu 2021-11-04 12:30:30 -04:00
Родитель e42f994f6b
Коммит 13d1ded253
7 изменённых файлов: 233 добавлений и 59 удалений

Просмотреть файл

@ -2381,3 +2381,77 @@ assert_equal '{:foo=>2}', %q{
foo
foo
}
# block invalidation edge case
assert_equal 'undef', %q{
class A
def foo(arg)
arg.times { A.remove_method(:bar) }
self
end
def bar
4
end
def use(arg)
# two consecutive sends. When bar is removed, the return address
# for calling it is already on foo's control frame
foo(arg).bar
rescue NoMethodError
:undef
end
end
A.new.use 0
A.new.use 0
A.new.use 1
}
# block invalidation edge case
assert_equal 'ok', %q{
class A
Good = :ng
def foo(arg)
arg.times { A.const_set(:Good, :ok) }
self
end
def id(arg)
arg
end
def use(arg)
# send followed by an opt_getinlinecache.
# The return address remains on the control frame
# when opt_getinlinecache is invalidated.
foo(arg).id(Good)
end
end
A.new.use 0
A.new.use 0
A.new.use 1
}
# block invalidation while out of memory
assert_equal 'new', %q{
def foo
:old
end
def test
foo
end
test
test
RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
def foo
:new
end
test
} if false # disabled for now since OOM crashes in the test harness

Просмотреть файл

@ -149,6 +149,10 @@ module RubyVM::YJIT
Primitive.cexpr! 'rb_yjit_enabled_p() ? Qtrue : Qfalse'
end
def self.simulate_oom!
Primitive.simulate_oom_bang
end
# Avoid calling a method here to not interfere with compilation tests
if Primitive.yjit_stats_enabled_p
at_exit { _print_stats }

Просмотреть файл

@ -405,6 +405,26 @@ yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
return jit->side_exit_for_pc;
}
// Ensure that there is an exit for the start of the block being compiled.
// Block invalidation uses this exit.
static void
jit_ensure_block_entry_exit(jitstate_t *jit)
{
block_t *block = jit->block;
if (block->entry_exit) return;
if (jit->insn_idx == block->blockid.idx) {
// We are compiling the first instruction in the block.
// Generate the exit with the cache in jitstate.
block->entry_exit = yjit_side_exit(jit, &block->ctx);
}
else {
VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
block->entry_exit = cb_get_ptr(ocb, pos);
}
}
// Generate a runtime guard that ensures the PC is at the start of the iseq,
// otherwise take a side exit. This is to handle the situation of optional
// parameters. When a function with optional parameters is called, the entry
@ -630,7 +650,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
// opt_getinlinecache wants to be in a block all on its own. Cut the block short
// if we run into it. See gen_opt_getinlinecache for details.
// if we run into it. See gen_opt_getinlinecache() for details.
if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
jit_jump_to_next_insn(&jit, ctx);
break;
@ -657,43 +677,45 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
// Lookup the codegen function for this instruction
codegen_fn gen_fn = gen_fns[opcode];
if (!gen_fn) {
// If we reach an unknown instruction,
// exit to the interpreter and stop compiling
yjit_gen_exit(jit.pc, ctx, cb);
break;
codegen_status_t status = YJIT_CANT_COMPILE;
if (gen_fn) {
if (0) {
fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
print_str(cb, insn_name(opcode));
}
// :count-placement:
// Count bytecode instructions that execute in generated code.
// Note that the increment happens even when the output takes side exit.
GEN_COUNTER_INC(cb, exec_instruction);
// Add a comment for the name of the YARV instruction
ADD_COMMENT(cb, insn_name(opcode));
// Call the code generation function
status = gen_fn(&jit, ctx, cb);
}
if (0) {
fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
print_str(cb, insn_name(opcode));
}
// :count-placement:
// Count bytecode instructions that execute in generated code.
// Note that the increment happens even when the output takes side exit.
GEN_COUNTER_INC(cb, exec_instruction);
// Add a comment for the name of the YARV instruction
ADD_COMMENT(cb, insn_name(opcode));
// Call the code generation function
codegen_status_t status = gen_fn(&jit, ctx, cb);
// For now, reset the chain depth after each instruction as only the
// first instruction in the block can concern itself with the depth.
ctx->chain_depth = 0;
// If we can't compile this instruction
// exit to the interpreter and stop compiling
if (status == YJIT_CANT_COMPILE) {
// TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
// the exit this generates would be wrong. We could save a copy of the entry context
// and assert that ctx is the same here.
yjit_gen_exit(jit.pc, ctx, cb);
uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
// If this is the first instruction in the block, then we can use
// the exit for block->entry_exit.
if (insn_idx == block->blockid.idx) {
block->entry_exit = cb_get_ptr(cb, exit_off);
}
break;
}
// For now, reset the chain depth after each instruction as only the
// first instruction in the block can concern itself with the depth.
ctx->chain_depth = 0;
// Move to the next instruction to compile
insn_idx += insn_len(opcode);
@ -1971,7 +1993,7 @@ gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
// Note: we generate the side-exit before popping operands from the stack
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
return YJIT_CANT_COMPILE;
}
@ -2036,7 +2058,7 @@ gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
// if overridden, emit the generic version
return false;
}
@ -2059,7 +2081,7 @@ gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
}
else if (CLASS_OF(comptime_a) == rb_cString &&
CLASS_OF(comptime_b) == rb_cString) {
if (!assume_bop_not_redefined(jit->block, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
// if overridden, emit the generic version
return false;
}
@ -2164,7 +2186,7 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
if (!assume_bop_not_redefined(jit->block, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
return YJIT_CANT_COMPILE;
}
@ -2212,7 +2234,7 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
return YJIT_END_BLOCK;
}
else if (CLASS_OF(comptime_recv) == rb_cHash) {
if (!assume_bop_not_redefined(jit->block, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
return YJIT_CANT_COMPILE;
}
@ -2347,7 +2369,7 @@ gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
// Note: we generate the side-exit before popping operands from the stack
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
return YJIT_CANT_COMPILE;
}
@ -2391,7 +2413,7 @@ gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
// Note: we generate the side-exit before popping operands from the stack
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
return YJIT_CANT_COMPILE;
}
@ -2435,7 +2457,7 @@ gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
// Note: we generate the side-exit before popping operands from the stack
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
return YJIT_CANT_COMPILE;
}
@ -2481,7 +2503,7 @@ gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
// Note: we generate the side-exit before popping operands from the stack
uint8_t *side_exit = yjit_side_exit(jit, ctx);
if (!assume_bop_not_redefined(jit->block, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
return YJIT_CANT_COMPILE;
}
@ -2579,7 +2601,7 @@ gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
static codegen_status_t
gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
{
if (!assume_bop_not_redefined(jit->block, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
return YJIT_CANT_COMPILE;
}
@ -2596,7 +2618,7 @@ gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
static codegen_status_t
gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
{
if (!assume_bop_not_redefined(jit->block, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
return YJIT_CANT_COMPILE;
}
@ -3965,7 +3987,7 @@ gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t
// Register block for invalidation
RUBY_ASSERT(cme->called_id == mid);
assume_method_lookup_stable(comptime_recv_klass, cme, jit->block);
assume_method_lookup_stable(comptime_recv_klass, cme, jit);
// To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
while (true) {
@ -4191,8 +4213,8 @@ gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
// We need to assume that both our current method entry and the super
// method entry we invoke remain stable
assume_method_lookup_stable(current_defined_class, me, jit->block);
assume_method_lookup_stable(comptime_superclass, cme, jit->block);
assume_method_lookup_stable(current_defined_class, me, jit);
assume_method_lookup_stable(comptime_superclass, cme, jit);
// Method calls may corrupt types
ctx_clear_local_types(ctx);
@ -4482,6 +4504,10 @@ gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
return YJIT_CANT_COMPILE;
}
// Make sure there is an exit for this block as the interpreter might want
// to invalidate this block from yjit_constant_ic_update().
jit_ensure_block_entry_exit(jit);
if (ice->ic_cref) {
// Cache is keyed on a certain lexical scope. Use the interpreter's cache.
uint8_t *side_exit = yjit_side_exit(jit, ctx);
@ -4506,11 +4532,11 @@ gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
else {
// Optimize for single ractor mode.
// FIXME: This leaks when st_insert raises NoMemoryError
if (!assume_single_ractor_mode(jit->block)) return YJIT_CANT_COMPILE;
if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
// Invalidate output code on any and all constant writes
// FIXME: This leaks when st_insert raises NoMemoryError
assume_stable_global_constant_state(jit->block);
assume_stable_global_constant_state(jit);
val_type_t type = yjit_type_of_value(ice->value);
x86opnd_t stack_top = ctx_stack_push(ctx, type);

Просмотреть файл

@ -10,6 +10,8 @@ typedef enum codegen_status {
// Code generation function signature
typedef codegen_status_t (*codegen_fn)(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
static void jit_ensure_block_entry_exit(jitstate_t *jit);
static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq);
static void yjit_gen_block(block_t *block, rb_execution_context_t *ec);

Просмотреть файл

@ -884,8 +884,7 @@ get_branch_target(
block_t *p_block = find_block_version(target, ctx);
// If the block already exists
if (p_block)
{
if (p_block) {
// Add an incoming branch for this version
rb_darray_append(&p_block->incoming, branch);
branch->blocks[target_idx] = p_block;
@ -894,12 +893,18 @@ get_branch_target(
return p_block->start_addr;
}
// Do we have enough memory for a stub?
const long MAX_CODE_SIZE = 64;
if (ocb->write_pos + MAX_CODE_SIZE >= cb->mem_size) {
return NULL;
}
// Generate an outlined stub that will call branch_stub_hit()
uint8_t *stub_addr = cb_get_ptr(ocb, ocb->write_pos);
// Call branch_stub_hit(branch_idx, target_idx, ec)
mov(ocb, C_ARG_REGS[2], REG_EC);
mov(ocb, C_ARG_REGS[1], imm_opnd(target_idx));
mov(ocb, C_ARG_REGS[1], imm_opnd(target_idx));
mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch));
call_ptr(ocb, REG0, (void *)&branch_stub_hit);
@ -907,6 +912,8 @@ get_branch_target(
// branch_stub_hit call
jmp_rm(ocb, RAX);
RUBY_ASSERT(cb_get_ptr(ocb, ocb->write_pos) - stub_addr <= MAX_CODE_SIZE);
return stub_addr;
}
@ -1116,6 +1123,29 @@ invalidate_block_version(block_t *block)
// Get a pointer to the generated code for this block
uint8_t *code_ptr = block->start_addr;
// Make the the start of the block do an exit. This handles OOM situations
// and some cases where we can't efficiently patch incoming branches.
// Do this first, since in case there is a fallthrough branch into this
// block, the patching loop below can overwrite the start of the block.
// In those situations, there is hopefully no jumps to the start of the block
// after patching as the start of the block would be in the middle of something
// generated by branch_t::gen_fn.
{
RUBY_ASSERT_ALWAYS(block->entry_exit && "block invalidation requires an exit");
if (block->entry_exit == block->start_addr) {
// Some blocks exit on entry. Patching a jump to the entry at the
// entry makes an infinite loop.
}
else if (block->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { // Don't patch frozen code region
// Patch in a jump to block->entry_exit.
uint32_t cur_pos = cb->write_pos;
cb_set_write_ptr(cb, block->start_addr);
jmp_ptr(cb, block->entry_exit);
RUBY_ASSERT_ALWAYS(cb_get_ptr(cb, cb->write_pos) < block->end_addr && "invalidation wrote past end of block");
cb_set_pos(cb, cur_pos);
}
}
// For each incoming branch
rb_darray_for(block->incoming, incoming_idx) {
branch_t *branch = rb_darray_get(block->incoming, incoming_idx);
@ -1132,18 +1162,31 @@ invalidate_block_version(block_t *block)
}
// Create a stub for this branch target
branch->dst_addrs[target_idx] = get_branch_target(
uint8_t *branch_target = get_branch_target(
block->blockid,
&block->ctx,
branch,
target_idx
);
if (!branch_target) {
// We were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
// still patch the branch in this situation so stubs are unique
// to branches. Think about what could go wrong if we run out of
// memory in the middle of this loop.
branch_target = block->entry_exit;
}
branch->dst_addrs[target_idx] = branch_target;
// Check if the invalidated block immediately follows
bool target_next = (block->start_addr == branch->end_addr);
if (target_next) {
// The new block will no longer be adjacent
// The new block will no longer be adjacent.
// Note that we could be enlarging the branch and writing into the
// start of the block being invalidated.
branch->shape = SHAPE_DEFAULT;
}

Просмотреть файл

@ -241,8 +241,8 @@ typedef struct yjit_block_version
ctx_t ctx;
// Positions where the generated code starts and ends
uint8_t* start_addr;
uint8_t* end_addr;
uint8_t *start_addr;
uint8_t *end_addr;
// List of incoming branches (from predecessors)
branch_array_t incoming;
@ -258,6 +258,10 @@ typedef struct yjit_block_version
// block in the system.
cme_dependency_array_t cme_dependencies;
// Code address of an exit for `ctx` and `blockid`. Used for block
// invalidation.
uint8_t *entry_exit;
// Index one past the last instruction in the iseq
uint32_t end_idx;

Просмотреть файл

@ -115,12 +115,13 @@ struct yjit_root_struct {
static st_table *blocks_assuming_bops;
static bool
assume_bop_not_redefined(block_t *block, int redefined_flag, enum ruby_basic_operators bop)
assume_bop_not_redefined(jitstate_t *jit, int redefined_flag, enum ruby_basic_operators bop)
{
if (BASIC_OP_UNREDEFINED_P(bop, redefined_flag)) {
if (blocks_assuming_bops) {
st_insert(blocks_assuming_bops, (st_data_t)block, 0);
}
RUBY_ASSERT(blocks_assuming_bops);
jit_ensure_block_entry_exit(jit);
st_insert(blocks_assuming_bops, (st_data_t)jit->block, 0);
return true;
}
else {
@ -206,7 +207,7 @@ add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int ex
//
// @raise NoMemoryError
static void
assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry_t *cme, block_t *block)
assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry_t *cme, jitstate_t *jit)
{
RUBY_ASSERT(cme_validity_dependency);
RUBY_ASSERT(method_lookup_dependency);
@ -214,6 +215,10 @@ assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry
RUBY_ASSERT_ALWAYS(RB_TYPE_P(receiver_klass, T_CLASS) || RB_TYPE_P(receiver_klass, T_ICLASS));
RUBY_ASSERT_ALWAYS(!rb_objspace_garbage_object_p(receiver_klass));
jit_ensure_block_entry_exit(jit);
block_t *block = jit->block;
cme_dependency_t cme_dep = { receiver_klass, (VALUE)cme };
rb_darray_append(&block->cme_dependencies, cme_dep);
@ -228,10 +233,13 @@ static st_table *blocks_assuming_single_ractor_mode;
// Can raise NoMemoryError.
RBIMPL_ATTR_NODISCARD()
static bool
assume_single_ractor_mode(block_t *block) {
assume_single_ractor_mode(jitstate_t *jit)
{
if (rb_multi_ractor_p()) return false;
st_insert(blocks_assuming_single_ractor_mode, (st_data_t)block, 1);
jit_ensure_block_entry_exit(jit);
st_insert(blocks_assuming_single_ractor_mode, (st_data_t)jit->block, 1);
return true;
}
@ -240,9 +248,10 @@ static st_table *blocks_assuming_stable_global_constant_state;
// Assume that the global constant state has not changed since call to this function.
// Can raise NoMemoryError.
static void
assume_stable_global_constant_state(block_t *block)
assume_stable_global_constant_state(jitstate_t *jit)
{
st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)block, 1);
jit_ensure_block_entry_exit(jit);
st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)jit->block, 1);
}
static int
@ -819,6 +828,18 @@ reset_stats_bang(rb_execution_context_t *ec, VALUE self)
return Qnil;
}
// Primitive for yjit.rb. For testing running out of executable memory
static VALUE
simulate_oom_bang(rb_execution_context_t *ec, VALUE self)
{
if (RUBY_DEBUG && cb && ocb) {
// Only simulate in debug builds for paranoia.
cb_set_pos(cb, cb->mem_size-1);
cb_set_pos(ocb, ocb->mem_size-1);
}
return Qnil;
}
#include "yjit.rbinc"
#if YJIT_STATS