YJIT: Deallocate `struct Block` to plug memory leaks

Previously we essentially never freed block even after invalidation.
Their reference count never reached zero for a couple of reasons:
1. `Branch::block` formed a cycle with the block holding the branch
2. Strong count on a branch that has ever contained a stub never
   reached 0 because we increment the `.clone()` call for
   `BranchRef::into_raw()` didn't have a matching decrement.

It's not safe to immediately deallocate blocks during
invalidation since `branch_stub_hit()` can end up
running with a branch pointer from an invalidated branch.
To plug the leaks, we wait until code GC or global invalidation and
deallocate the blocks for iseqs that are definitely not running.
This commit is contained in:
Alan Wu 2022-11-29 16:14:13 -05:00 коммит произвёл Maxime Chevalier-Boisvert
Родитель b30248f74a
Коммит a0b0365e90
3 изменённых файлов: 84 добавлений и 27 удалений

Просмотреть файл

@ -3423,3 +3423,16 @@ assert_equal '1', %q{
bar { }
bar { }
}
# test for return stub lifetime issue
assert_equal '1', %q{
def foo(n)
if n == 2
return 1.times { Object.define_method(:foo) {} }
end
foo(n + 1)
end
foo(1)
}

Просмотреть файл

@ -1,4 +1,3 @@
//use crate::asm::x86_64::*;
use crate::asm::*;
use crate::backend::ir::*;
use crate::codegen::*;
@ -17,6 +16,7 @@ use std::mem;
use std::rc::{Rc};
use YARVOpnd::*;
use TempMapping::*;
use crate::invariants::block_assumptions_free;
// Maximum number of temp value types we keep track of
pub const MAX_TEMP_TYPES: usize = 8;
@ -492,6 +492,10 @@ pub struct IseqPayload {
// Indexes of code pages used by this this ISEQ
pub pages: HashSet<usize>,
// Blocks that are invalidated but are not yet deallocated.
// The code GC will free them later.
pub dead_blocks: Vec<BlockRef>,
}
impl IseqPayload {
@ -599,8 +603,6 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
}
};
use crate::invariants;
// Take ownership of the payload with Box::from_raw().
// It drops right before this function returns.
// SAFETY: We got the pointer from Box::into_raw().
@ -609,10 +611,10 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
// Increment the freed iseq count
incr_counter!(freed_iseq_count);
// Remove all blocks in the payload from global invariants table.
// Free all blocks in the payload
for versions in &payload.version_map {
for block in versions {
invariants::block_assumptions_free(&block);
free_block(block);
}
}
}
@ -1896,10 +1898,12 @@ fn set_branch_target(
// Generate an outlined stub that will call branch_stub_hit()
let stub_addr = ocb.get_write_ptr();
// Get a raw pointer to the branch while keeping the reference count alive
// Here clone increments the strong count by 1
// This means the branch stub owns its own reference to the branch
// Get a raw pointer to the branch. We clone and then decrement the strong count which overall
// balances the strong count. We do this so that we're passing the result of [Rc::into_raw] to
// [Rc::from_raw] as required.
// We make sure the block housing the branch is still alive when branch_stub_hit() is running.
let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone());
unsafe { BranchRef::decrement_strong_count(branch_ptr) };
let mut asm = Assembler::new();
asm.comment("branch stub hit");
@ -2087,12 +2091,7 @@ pub fn defer_compilation(
asm.mark_branch_end(&branch_rc);
}
// Remove all references to a block then free it.
pub fn free_block(blockref: &BlockRef) {
use crate::invariants::*;
block_assumptions_free(blockref);
fn remove_from_graph(blockref: &BlockRef) {
let block = blockref.borrow();
// Remove this block from the predecessor's targets
@ -2123,6 +2122,19 @@ pub fn free_block(blockref: &BlockRef) {
}
}
}
}
/// Remove most references to a block to deallocate it.
/// Does not touch references from iseq payloads.
pub fn free_block(blockref: &BlockRef) {
block_assumptions_free(blockref);
remove_from_graph(blockref);
// Branches have a Rc pointing at the block housing them.
// Break the cycle.
blockref.borrow_mut().incoming.clear();
blockref.borrow_mut().outgoing.clear();
// No explicit deallocation here as blocks are ref-counted.
}
@ -2293,7 +2305,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// FIXME:
// Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
free_block(blockref);
delayed_deallocation(blockref);
ocb.unwrap().mark_all_executable();
cb.mark_all_executable();
@ -2301,6 +2313,31 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
incr_counter!(invalidation_count);
}
// We cannot deallocate blocks immediately after invalidation since there
// could be stubs waiting to access branch pointers. Return stubs can do
// this since patching the code for setting up return addresses does not
// affect old return addresses that are already set up to use potentially
// invalidated branch pointers. Example:
// def foo(n)
// if n == 2
// return 1.times { Object.define_method(:foo) {} }
// end
//
// foo(n + 1)
// end
// p foo(1)
pub fn delayed_deallocation(blockref: &BlockRef) {
block_assumptions_free(blockref);
// We do this another time when we deem that it's safe
// to deallocate in case there is another Ractor waiting to acquire the
// VM lock inside branch_stub_hit().
remove_from_graph(blockref);
let payload = get_iseq_payload(blockref.borrow().blockid.iseq).unwrap();
payload.dead_blocks.push(blockref.clone());
}
#[cfg(test)]
mod tests {
use crate::core::*;

Просмотреть файл

@ -497,21 +497,28 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
// Stop other ractors since we are going to patch machine code.
with_vm_lock(src_loc!(), || {
// Make it so all live block versions are no longer valid branch targets
let mut on_stack_iseqs = HashSet::new();
for_each_on_stack_iseq(|iseq| {
on_stack_iseqs.insert(iseq);
});
for_each_iseq(|iseq| {
if let Some(payload) = get_iseq_payload(iseq) {
// C comment:
// Leaking the blocks for now since we might have situations where
// a different ractor is waiting for the VM lock in branch_stub_hit().
// If we free the block that ractor can wake up with a dangling block.
//
// Deviation: since we ref count the the blocks now, we might be deallocating and
// not leak the block.
//
// Empty all blocks on the iseq so we don't compile new blocks that jump to the
// invalidated region.
let blocks = payload.take_all_blocks();
for blockref in blocks {
block_assumptions_free(&blockref);
if on_stack_iseqs.contains(&iseq) {
// This ISEQ is running, so we can't free blocks immediately
for block in blocks {
delayed_deallocation(&block);
}
payload.dead_blocks.shrink_to_fit();
} else {
// Safe to free dead blocks since the ISEQ isn't running
for block in blocks {
free_block(&block);
}
mem::take(&mut payload.dead_blocks)
.iter()
.for_each(free_block);
}
}