зеркало из https://github.com/github/ruby.git
YJIT: Add ability to exit to interpreter from stubs
Previously, YJIT assumed that it's always possible to generate a new
basic block when servicing a stub in branch_stub_hit(). When YJIT is out
of executable memory, for example, this assumption doesn't hold up.
Add handling to branch_stub_hit() for servicing stubs without consuming
more executable memory by adding a code path that exits to the
interpreter at the location the branch stub represents. The new code
path reconstructs interpreter state in branch_stub_hit() and then exits
with a new snippet called `code_for_exit_from_stub` that returns
`Qundef` from the YJIT native stack frame.
As this change adds another place where we regenerate code from
`branch_t`, extract the logic for it into a new function and call it
regenerate_branch(). While we are at it, make the branch shrinking code
path in branch_stub_hit() more explicit.
This new functionality is hard to test without full support for out of
memory conditions. To verify this change, I ran
`RUBY_YJIT_ENABLE=1 make check -j12` with the following patch to stress
test the new code path:
```diff
diff --git a/yjit_core.c b/yjit_core.c
index 4ab63d9806..5788b8c5ed 100644
--- a/yjit_core.c
+++ b/yjit_core.c
@@ -878,8 +878,12 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
cb_set_write_ptr(cb, branch->end_addr);
}
+if (rand() < RAND_MAX/2) {
// Compile the new block version
p_block = gen_block_version(target, target_ctx, ec);
+}else{
+ p_block = NULL;
+}
if (!p_block && branch_modified) {
// We couldn't generate a new block for the branch, but we modified the branch.
```
We can enable the new test along with other OOM tests once full support
lands.
Other small changes:
* yjit_utils.c (print_str): Update to work with new native frame shape.
Follow up for 8fa0ee4d40
.
* yjit_iface.c (rb_yjit_init): Run yjit_init_core() after
yjit_init_codegen() so `cb` and `ocb` are available.
This commit is contained in:
Родитель
59439446d9
Коммит
b5b6ab4194
|
@ -2434,6 +2434,24 @@ assert_equal 'ok', %q{
|
|||
A.new.use 1
|
||||
}
|
||||
|
||||
assert_equal 'ok', %q{
|
||||
# test hitting a branch stub when out of memory
|
||||
def nimai(jita)
|
||||
if jita
|
||||
:ng
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
nimai(true)
|
||||
nimai(true)
|
||||
|
||||
RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
|
||||
|
||||
nimai(false)
|
||||
} if false # disabled for now since OOM crashes in the test harness
|
||||
|
||||
# block invalidation while out of memory
|
||||
assert_equal 'new', %q{
|
||||
def foo
|
||||
|
|
2
yjit.c
2
yjit.c
|
@ -123,6 +123,8 @@ YJIT_DECLARE_COUNTERS(
|
|||
compiled_iseq_count,
|
||||
compiled_block_count,
|
||||
|
||||
exit_from_branch_stub,
|
||||
|
||||
invalidation_count,
|
||||
invalidate_method_lookup,
|
||||
invalidate_bop_redefined,
|
||||
|
|
|
@ -382,6 +382,26 @@ yjit_gen_leave_exit(codeblock_t *cb)
|
|||
return code_ptr;
|
||||
}
|
||||
|
||||
// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
|
||||
// to the interpreter when it cannot service a stub by generating new code.
|
||||
// Before coming here, branch_stub_hit() takes care of fully reconstructing
|
||||
// interpreter state.
|
||||
static void
|
||||
gen_code_for_exit_from_stub(void)
|
||||
{
|
||||
codeblock_t *cb = ocb;
|
||||
code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
|
||||
|
||||
GEN_COUNTER_INC(cb, exit_from_branch_stub);
|
||||
|
||||
pop(cb, REG_SP);
|
||||
pop(cb, REG_EC);
|
||||
pop(cb, REG_CFP);
|
||||
|
||||
mov(cb, RAX, imm_opnd(Qundef));
|
||||
ret(cb);
|
||||
}
|
||||
|
||||
// :side-exit:
|
||||
// Get an exit for the current instruction in the outlined block. The code
|
||||
// for each instruction often begins with several guards before proceeding
|
||||
|
|
|
@ -16,6 +16,8 @@ static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq);
|
|||
|
||||
static void yjit_gen_block(block_t *block, rb_execution_context_t *ec);
|
||||
|
||||
static void gen_code_for_exit_from_stub(void);
|
||||
|
||||
static void yjit_init_codegen(void);
|
||||
|
||||
#endif // #ifndef YJIT_CODEGEN_H
|
||||
|
|
158
yjit_core.c
158
yjit_core.c
|
@ -9,6 +9,10 @@
|
|||
#include "yjit_core.h"
|
||||
#include "yjit_codegen.h"
|
||||
|
||||
// For exiting from YJIT frame from branch_stub_hit().
|
||||
// Filled by gen_code_for_exit_from_stub().
|
||||
static uint8_t *code_for_exit_from_stub = NULL;
|
||||
|
||||
/*
|
||||
Get an operand for the adjusted stack pointer address
|
||||
*/
|
||||
|
@ -597,6 +601,52 @@ add_block_version(blockid_t blockid, block_t *block)
|
|||
#endif
|
||||
}
|
||||
|
||||
static ptrdiff_t
|
||||
branch_code_size(const branch_t *branch)
|
||||
{
|
||||
return branch->end_addr - branch->start_addr;
|
||||
}
|
||||
|
||||
// Generate code for a branch, possibly rewriting and changing the size of it
|
||||
static void
|
||||
regenerate_branch(codeblock_t *cb, branch_t *branch)
|
||||
{
|
||||
if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
// Generating this branch would modify frozen bytes. Do nothing.
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t old_write_pos = cb->write_pos;
|
||||
const bool branch_terminates_block = branch->end_addr == branch->block->end_addr;
|
||||
|
||||
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
|
||||
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
|
||||
if (branch_terminates_block) {
|
||||
// Adjust block size
|
||||
branch->block->end_addr = branch->end_addr;
|
||||
}
|
||||
|
||||
// cb->write_pos is both a write cursor and a marker for the end of
|
||||
// everything written out so far. Leave cb->write_pos at the end of the
|
||||
// block before returning. This function only ever bump or retain the end
|
||||
// of block marker since that's what the majority of callers want. When the
|
||||
// branch sits at the very end of the codeblock and it shrinks after
|
||||
// regeneration, it's up to the caller to drop bytes off the end to
|
||||
// not leave a gap and implement branch->shape.
|
||||
if (old_write_pos > cb->write_pos) {
|
||||
// We rewound cb->write_pos to generate the branch, now restore it.
|
||||
cb_set_pos(cb, old_write_pos);
|
||||
}
|
||||
else {
|
||||
// The branch sits at the end of cb and consumed some memory.
|
||||
// Keep cb->write_pos.
|
||||
}
|
||||
}
|
||||
|
||||
// Create a new outgoing branch entry for a block
|
||||
static branch_t*
|
||||
make_branch_entry(block_t *block, const ctx_t *src_ctx, branchgen_fn gen_fn)
|
||||
|
@ -777,13 +827,15 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t
|
|||
static uint8_t *
|
||||
branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_context_t *ec)
|
||||
{
|
||||
uint8_t *dst_addr;
|
||||
uint8_t *dst_addr = NULL;
|
||||
|
||||
// Stop other ractors since we are going to patch machine code.
|
||||
// This is how the GC does it.
|
||||
RB_VM_LOCK_ENTER();
|
||||
rb_vm_barrier();
|
||||
|
||||
const ptrdiff_t branch_size_on_entry = branch_code_size(branch);
|
||||
|
||||
RUBY_ASSERT(branch != NULL);
|
||||
RUBY_ASSERT(target_idx < 2);
|
||||
blockid_t target = branch->targets[target_idx];
|
||||
|
@ -794,18 +846,13 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
if (branch->blocks[target_idx]) {
|
||||
dst_addr = branch->dst_addrs[target_idx];
|
||||
}
|
||||
else
|
||||
{
|
||||
//fprintf(stderr, "\nstub hit, branch: %p, target idx: %d\n", branch, target_idx);
|
||||
//fprintf(stderr, "blockid.iseq=%p, blockid.idx=%d\n", target.iseq, target.idx);
|
||||
//fprintf(stderr, "chain_depth=%d\n", target_ctx->chain_depth);
|
||||
|
||||
else {
|
||||
// :stub-sp-flush:
|
||||
// Generated code do stack operations without modifying cfp->sp, while the
|
||||
// cfp->sp tells the GC what values on the stack to root. Generated code
|
||||
// generally takes care of updating cfp->sp when it calls runtime routines that
|
||||
// could trigger GC, but for the case of branch stubs, it's inconvenient. So
|
||||
// we do it here.
|
||||
// could trigger GC, but it's inconvenient to do it before calling this function.
|
||||
// So we do it here instead.
|
||||
VALUE *const original_interp_sp = ec->cfp->sp;
|
||||
ec->cfp->sp += target_ctx->sp_offset;
|
||||
|
||||
|
@ -818,8 +865,11 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
|
||||
// If this block hasn't yet been compiled
|
||||
if (!p_block) {
|
||||
const uint8_t branch_old_shape = branch->shape;
|
||||
bool branch_modified = false;
|
||||
|
||||
// If the new block can be generated right after the branch (at cb->write_pos)
|
||||
if (cb_get_write_ptr(cb) == branch->end_addr && branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
if (cb_get_write_ptr(cb) == branch->end_addr) {
|
||||
// This branch should be terminating its block
|
||||
RUBY_ASSERT(branch->end_addr == branch->block->end_addr);
|
||||
|
||||
|
@ -827,43 +877,62 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex
|
|||
branch->shape = (uint8_t)target_idx;
|
||||
|
||||
// Rewrite the branch with the new, potentially more compact shape
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
RUBY_ASSERT(cb_get_write_ptr(cb) <= branch->end_addr && "can't enlarge branches");
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
branch->block->end_addr = cb_get_write_ptr(cb);
|
||||
regenerate_branch(cb, branch);
|
||||
branch_modified = true;
|
||||
|
||||
// Ensure that the branch terminates the codeblock just like
|
||||
// before entering this if block. This drops bytes off the end
|
||||
// in case we shrank the branch when regenerating.
|
||||
cb_set_write_ptr(cb, branch->end_addr);
|
||||
}
|
||||
|
||||
// Compile the new block version
|
||||
p_block = gen_block_version(target, target_ctx, ec);
|
||||
RUBY_ASSERT(p_block);
|
||||
|
||||
if (!p_block && branch_modified) {
|
||||
// We couldn't generate a new block for the branch, but we modified the branch.
|
||||
// Restore the branch by regenerating it.
|
||||
branch->shape = branch_old_shape;
|
||||
regenerate_branch(cb, branch);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_block) {
|
||||
// Branch shape should reflect layout
|
||||
RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr));
|
||||
|
||||
// Add this branch to the list of incoming branches for the target
|
||||
rb_darray_append(&p_block->incoming, branch);
|
||||
|
||||
// Update the branch target address
|
||||
dst_addr = p_block->start_addr;
|
||||
branch->dst_addrs[target_idx] = dst_addr;
|
||||
|
||||
// Mark this branch target as patched (no longer a stub)
|
||||
branch->blocks[target_idx] = p_block;
|
||||
|
||||
// Rewrite the branch with the new jump target address
|
||||
regenerate_branch(cb, branch);
|
||||
|
||||
// Restore interpreter sp, since the code hitting the stub expects the original.
|
||||
ec->cfp->sp = original_interp_sp;
|
||||
}
|
||||
|
||||
// Add this branch to the list of incoming branches for the target
|
||||
rb_darray_append(&p_block->incoming, branch);
|
||||
|
||||
// Update the branch target address
|
||||
dst_addr = p_block->start_addr;
|
||||
branch->dst_addrs[target_idx] = dst_addr;
|
||||
|
||||
// Rewrite the branch with the new jump target address
|
||||
if (branch->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
|
||||
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
|
||||
uint32_t cur_pos = cb->write_pos;
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
RUBY_ASSERT(cb_get_write_ptr(cb) == branch->end_addr && "branch can't change size");
|
||||
cb_set_pos(cb, cur_pos);
|
||||
else {
|
||||
// Failed to service the stub by generating a new block so now we
|
||||
// need to exit to the interpreter at the stubbed location. We are
|
||||
// intentionally *not* restoring original_interp_sp. At the time of
|
||||
// writing, reconstructing interpreter state only involves setting
|
||||
// cfp->sp and cfp->pc. We set both before trying to generate the
|
||||
// block. All there is left to do to exit is to pop the native
|
||||
// frame. We do that in code_for_exit_from_stub.
|
||||
dst_addr = code_for_exit_from_stub;
|
||||
}
|
||||
|
||||
// Mark this branch target as patched (no longer a stub)
|
||||
branch->blocks[target_idx] = p_block;
|
||||
|
||||
// Restore interpreter sp, since the code hitting the stub expects the original.
|
||||
ec->cfp->sp = original_interp_sp;
|
||||
}
|
||||
|
||||
const ptrdiff_t new_branch_size = branch_code_size(branch);
|
||||
RUBY_ASSERT_ALWAYS(new_branch_size >= 0);
|
||||
RUBY_ASSERT_ALWAYS(new_branch_size <= branch_size_on_entry && "branch stubs should not enlarge branches");
|
||||
|
||||
RB_VM_LOCK_LEAVE();
|
||||
|
||||
// Return a pointer to the compiled block version
|
||||
|
@ -942,8 +1011,7 @@ gen_branch(
|
|||
|
||||
// Call the branch generation function
|
||||
branch->start_addr = cb_get_write_ptr(cb);
|
||||
gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], SHAPE_DEFAULT);
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
regenerate_branch(cb, branch);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1191,13 +1259,7 @@ invalidate_block_version(block_t *block)
|
|||
}
|
||||
|
||||
// Rewrite the branch with the new jump target address
|
||||
RUBY_ASSERT(branch->dst_addrs[0] != NULL);
|
||||
uint32_t cur_pos = cb->write_pos;
|
||||
cb_set_write_ptr(cb, branch->start_addr);
|
||||
branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
|
||||
branch->end_addr = cb_get_write_ptr(cb);
|
||||
branch->block->end_addr = cb_get_write_ptr(cb);
|
||||
cb_set_pos(cb, cur_pos);
|
||||
regenerate_branch(cb, branch);
|
||||
|
||||
if (target_next && branch->end_addr > block->end_addr) {
|
||||
fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%ld block_size=%ld\n",
|
||||
|
@ -1243,5 +1305,5 @@ invalidate_block_version(block_t *block)
|
|||
static void
|
||||
yjit_init_core(void)
|
||||
{
|
||||
// Nothing yet
|
||||
gen_code_for_exit_from_stub();
|
||||
}
|
||||
|
|
|
@ -192,8 +192,8 @@ typedef struct yjit_branch_entry
|
|||
struct yjit_block_version *block;
|
||||
|
||||
// Positions where the generated code starts and ends
|
||||
uint8_t* start_addr;
|
||||
uint8_t* end_addr;
|
||||
uint8_t *start_addr;
|
||||
uint8_t *end_addr;
|
||||
|
||||
// Context right after the branch instruction
|
||||
ctx_t src_ctx;
|
||||
|
@ -204,7 +204,7 @@ typedef struct yjit_branch_entry
|
|||
struct yjit_block_version *blocks[2];
|
||||
|
||||
// Jump target addresses
|
||||
uint8_t* dst_addrs[2];
|
||||
uint8_t *dst_addrs[2];
|
||||
|
||||
// Branch code generation function
|
||||
branchgen_fn gen_fn;
|
||||
|
|
|
@ -1232,8 +1232,7 @@ rb_yjit_init(struct rb_yjit_options *options)
|
|||
}
|
||||
|
||||
// If type propagation is disabled, max 1 version per block
|
||||
if (rb_yjit_opts.no_type_prop)
|
||||
{
|
||||
if (rb_yjit_opts.no_type_prop) {
|
||||
rb_yjit_opts.max_versions = 1;
|
||||
}
|
||||
|
||||
|
@ -1241,8 +1240,8 @@ rb_yjit_init(struct rb_yjit_options *options)
|
|||
blocks_assuming_single_ractor_mode = st_init_numtable();
|
||||
blocks_assuming_bops = st_init_numtable();
|
||||
|
||||
yjit_init_core();
|
||||
yjit_init_codegen();
|
||||
yjit_init_core();
|
||||
|
||||
// YJIT Ruby module
|
||||
mYjit = rb_define_module_under(rb_cRubyVM, "YJIT");
|
||||
|
|
|
@ -101,13 +101,9 @@ print_str(codeblock_t *cb, const char *str)
|
|||
cb_write_byte(cb, (uint8_t)str[i]);
|
||||
cb_write_byte(cb, 0);
|
||||
|
||||
push(cb, RSP); // Alignment
|
||||
|
||||
// Call the print function
|
||||
mov(cb, RAX, const_ptr_opnd((void*)&print_str_cfun));
|
||||
call(cb, RAX);
|
||||
|
||||
pop(cb, RSP); // Alignment
|
||||
|
||||
pop_regs(cb);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче