YJIT: GC and recompile all code pages (#6406)

when it fails to allocate a new page.

Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
This commit is contained in:
Takashi Kokubun 2022-10-25 09:07:10 -07:00 коммит произвёл GitHub
Родитель 1d2d25dcad
Коммит b7644a2311
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 454 добавлений и 32 удалений

2
cont.c
Просмотреть файл

@ -69,7 +69,7 @@ static VALUE rb_cFiberPool;
#define FIBER_POOL_ALLOCATION_FREE
#endif
#define jit_cont_enabled mjit_enabled // To be used by YJIT later
#define jit_cont_enabled (mjit_enabled || rb_yjit_enabled_p())
enum context_type {
CONTINUATION_CONTEXT = 0,

Просмотреть файл

@ -825,12 +825,126 @@ class TestYJIT < Test::Unit::TestCase
RUBY
end
def test_code_gc
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
return :not_paged unless add_pages(100) # prepare freeable pages
code_gc # first code GC
return :not_compiled1 unless compiles { nil } # should be JITable again
code_gc # second code GC
return :not_compiled2 unless compiles { nil } # should be JITable again
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 2
:ok
RUBY
end
def test_on_stack_code_gc_call
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
fiber = Fiber.new {
# Loop to call the same basic block again after Fiber.yield
while true
Fiber.yield(nil.to_i)
end
}
return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
return :broken_resume1 if fiber.resume != 0 # JIT the fiber
code_gc # first code GC, which should not free the fiber page
return :broken_resume2 if fiber.resume != 0 # The code should be still callable
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 1
:ok
RUBY
end
def test_on_stack_code_gc_twice
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
fiber = Fiber.new {
# Loop to call the same basic block again after Fiber.yield
while Fiber.yield(nil.to_i); end
}
return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
return :broken_resume1 if fiber.resume(true) != 0 # JIT the fiber
code_gc # first code GC, which should not free the fiber page
return :not_paged2 unless add_pages(300) # add some stuff to be freed
# Not calling fiber.resume here to test the case that the YJIT payload loses some
# information at the previous code GC. The payload should still be there, and
# thus we could know the fiber ISEQ is still on stack on this second code GC.
code_gc # second code GC, which should still not free the fiber page
return :not_paged3 unless add_pages(200) # attempt to overwrite the fiber page (it shouldn't)
return :broken_resume2 if fiber.resume(true) != 0 # The fiber code should be still fine
return :broken_resume3 if fiber.resume(false) != nil # terminate the fiber
code_gc # third code GC, freeing a page that used to be on stack
return :not_paged4 unless add_pages(100) # check everything still works
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 3
:ok
RUBY
end
def test_code_gc_with_many_iseqs
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok, mem_size: 1)
fiber = Fiber.new {
# Loop to call the same basic block again after Fiber.yield
while true
Fiber.yield(nil.to_i)
end
}
return :not_paged1 unless add_pages(500) # use some pages
return :broken_resume1 if fiber.resume != 0 # leave an on-stack code as well
add_pages(2000) # use a whole lot of pages to run out of 1MiB
return :broken_resume2 if fiber.resume != 0 # on-stack code should be callable
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count == 0
:ok
RUBY
end
private
def code_gc_helpers
<<~'RUBY'
def compiles(&block)
failures = RubyVM::YJIT.runtime_stats[:compilation_failure]
block.call
failures == RubyVM::YJIT.runtime_stats[:compilation_failure]
end
def add_pages(num_jits)
pages = RubyVM::YJIT.runtime_stats[:compiled_page_count]
num_jits.times { return false unless eval('compiles { nil.to_i }') }
pages.nil? || pages < RubyVM::YJIT.runtime_stats[:compiled_page_count]
end
def code_gc
RubyVM::YJIT.simulate_oom! # bump write_pos
eval('proc { nil }.call') # trigger code GC
end
RUBY
end
def assert_no_exits(script)
assert_compiles(script)
end
ANY = Object.new
def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil)
def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil, mem_size: nil)
reset_stats = <<~RUBY
RubyVM::YJIT.runtime_stats
RubyVM::YJIT.reset_stats!
@ -864,7 +978,7 @@ class TestYJIT < Test::Unit::TestCase
#{write_results}
RUBY
status, out, err, stats = eval_with_jit(script, call_threshold: call_threshold)
status, out, err, stats = eval_with_jit(script, call_threshold:, mem_size:)
assert status.success?, "exited with status #{status.to_i}, stderr:\n#{err}"
@ -918,12 +1032,13 @@ class TestYJIT < Test::Unit::TestCase
s.chars.map { |c| c.ascii_only? ? c : "\\u%x" % c.codepoints[0] }.join
end
def eval_with_jit(script, call_threshold: 1, timeout: 1000)
def eval_with_jit(script, call_threshold: 1, timeout: 1000, mem_size: nil)
args = [
"--disable-gems",
"--yjit-call-threshold=#{call_threshold}",
"--yjit-stats"
]
args << "--yjit-exec-mem-size=#{mem_size}" if mem_size
args << "-e" << script_shell_encode(script)
stats_r, stats_w = IO.pipe
out, err, status = EnvUtil.invoke_ruby(args,

23
yjit.c
Просмотреть файл

@ -27,6 +27,7 @@
#include "probes_helper.h"
#include "iseq.h"
#include "ruby/debug.h"
#include "internal/cont.h"
// For mmapp(), sysconf()
#ifndef _WIN32
@ -65,10 +66,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
bool
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
{
if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
return false;
}
return true;
return mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0;
}
void
@ -85,6 +83,20 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
}
}
// Free the specified memory block.
bool
rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)
{
// On Linux, you need to use madvise MADV_DONTNEED to free memory.
// We might not need to call this on macOS, but it's not really documented.
// We generally prefer to do the same thing on both to ease testing too.
madvise(mem_block, mem_size, MADV_DONTNEED);
// On macOS, mprotect PROT_NONE seems to reduce RSS.
// We also call this on Linux to avoid executing unused pages.
return mprotect(mem_block, mem_size, PROT_NONE) == 0;
}
// `start` is inclusive and `end` is exclusive.
void
rb_yjit_icache_invalidate(void *start, void *end)
@ -387,6 +399,9 @@ rb_iseq_reset_jit_func(const rb_iseq_t *iseq)
{
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
iseq->body->jit_func = NULL;
// Enable re-compiling this ISEQ. Event when it's invalidated for TracePoint,
// we'd like to re-compile ISEQs that haven't been converted to trace_* insns.
iseq->body->total_calls = 0;
}
// Get the PC for a given index in an iseq

Просмотреть файл

@ -212,13 +212,17 @@ module RubyVM::YJIT
$stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations])
$stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set])
$stderr.puts "compilation_failure: " + ("%10d" % compilation_failure) if compilation_failure != 0
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
$stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count])
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
$stderr.puts "compiled_page_count: " + ("%10d" % stats[:compiled_page_count])
$stderr.puts "freed_iseq_count: " + ("%10d" % stats[:freed_iseq_count])
$stderr.puts "freed_page_count: " + ("%10d" % stats[:freed_page_count])
$stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count])
$stderr.puts "constant_state_bumps: " + ("%10d" % stats[:constant_state_bumps])
$stderr.puts "inline_code_size: " + ("%10d" % stats[:inline_code_size])
$stderr.puts "outlined_code_size: " + ("%10d" % stats[:outlined_code_size])
$stderr.puts "freed_code_size: " + ("%10d" % stats[:freed_code_size])
$stderr.puts "code_gc_count: " + ("%10d" % stats[:code_gc_count])
$stderr.puts "num_gc_obj_refs: " + ("%10d" % stats[:num_gc_obj_refs])
$stderr.puts "total_exit_count: " + ("%10d" % total_exits)

Просмотреть файл

@ -263,6 +263,7 @@ fn main() {
.allowlist_function("rb_yjit_reserve_addr_space")
.allowlist_function("rb_yjit_mark_writable")
.allowlist_function("rb_yjit_mark_executable")
.allowlist_function("rb_yjit_mark_unused")
.allowlist_function("rb_yjit_get_page_size")
.allowlist_function("rb_leaf_invokebuiltin_iseq_p")
.allowlist_function("rb_leaf_builtin_function")
@ -297,6 +298,9 @@ fn main() {
// From internal/compile.h
.allowlist_function("rb_vm_insn_decode")
// from internal/cont.h
.allowlist_function("rb_jit_cont_each_iseq")
// From iseq.h
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")

Просмотреть файл

@ -6,6 +6,9 @@ use std::rc::Rc;
use crate::backend::x86_64::JMP_PTR_BYTES;
#[cfg(target_arch = "aarch64")]
use crate::backend::arm64::JMP_PTR_BYTES;
use crate::core::for_each_on_stack_iseq_payload;
use crate::invariants::rb_yjit_tracing_invalidate_all;
use crate::stats::incr_counter;
use crate::virtualmem::WriteError;
#[cfg(feature = "disasm")]
@ -115,17 +118,23 @@ impl CodeBlock {
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
let old_write_ptr = self.get_write_ptr();
self.set_write_ptr(base_ptr);
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
// Use the freed_pages list if code GC has been used. Otherwise use the next page.
let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() {
let current_page = self.write_pos / self.page_size;
freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
} else {
Some(self.write_pos / self.page_size + 1)
};
// Move self to the next page
let next_page_idx = self.write_pos / self.page_size + 1;
if !self.set_page(next_page_idx, &jmp_ptr) {
if next_page_idx.is_none() || !self.set_page(next_page_idx.unwrap(), &jmp_ptr) {
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
return false;
}
// Move the other CodeBlock to the same page if it'S on the furthest page
self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
return !self.dropped_bytes;
}
@ -151,7 +160,7 @@ impl CodeBlock {
// We could remember the last write_pos in page2 and let set_page use that position,
// but you need to waste some space for keeping write_pos for every single page.
// It doesn't seem necessary for performance either. So we're currently not doing it.
let dst_pos = self.page_size * page_idx + self.page_start();
let dst_pos = self.get_page_pos(page_idx);
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
// Reset dropped_bytes
self.dropped_bytes = false;
@ -161,6 +170,7 @@ impl CodeBlock {
self.write_pos = dst_pos;
let dst_ptr = self.get_write_ptr();
self.write_pos = src_pos;
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
// Generate jmp_ptr from src_pos to dst_pos
self.without_page_end_reserve(|cb| {
@ -175,6 +185,53 @@ impl CodeBlock {
!self.dropped_bytes
}
/// Free the memory pages of given code page indexes
fn free_pages(&mut self, page_idxs: &Vec<usize>) {
let mut page_idxs = page_idxs.clone();
page_idxs.reverse(); // to loop with pop()
// Group adjacent page indexes and free them in batches to reduce the # of syscalls.
while let Some(page_idx) = page_idxs.pop() {
// Group first adjacent page indexes
let mut batch_idxs = vec![page_idx];
while page_idxs.last() == Some(&(batch_idxs.last().unwrap() + 1)) {
batch_idxs.push(page_idxs.pop().unwrap());
}
// Free the grouped pages at once
let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
let batch_size = self.page_size * batch_idxs.len();
self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
}
}
pub fn page_size(&self) -> usize {
self.page_size
}
/// Return the number of code pages that have been allocated by the VirtualMemory.
pub fn num_pages(&self) -> usize {
let mapped_region_size = self.mem_block.borrow().mapped_region_size();
// CodeBlock's page size != VirtualMem's page size on Linux,
// so mapped_region_size % self.page_size may not be 0
((mapped_region_size - 1) / self.page_size) + 1
}
/// Return the number of code pages that have been freed and not used yet.
pub fn num_freed_pages(&self) -> usize {
(0..self.num_pages()).filter(|&page_idx| self.has_freed_page(page_idx)).count()
}
pub fn has_freed_page(&self, page_idx: usize) -> bool {
CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
self.write_pos < page_idx * self.page_size // and not written yet
}
/// Convert a page index to the write_pos for the page start.
fn get_page_pos(&self, page_idx: usize) -> usize {
self.page_size * page_idx + self.page_start()
}
/// write_pos of the current page start
pub fn page_start_pos(&self) -> usize {
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
@ -216,21 +273,48 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
let mut addrs = vec![];
let mut start = start_ptr.into_usize();
// CodegenGlobals is not initialized when we write initial ocb code
let freed_pages = if CodegenGlobals::has_instance() {
CodegenGlobals::get_freed_pages().as_ref()
} else {
None
};
let region_start = self.get_ptr(0).into_usize();
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
let mut start = start_ptr.into_usize();
let end = std::cmp::min(end_ptr.into_usize(), region_end);
let mut addrs = vec![];
while start < end {
let current_page = region_start +
(start.saturating_sub(region_start) / self.page_size * self.page_size);
let page_idx = start.saturating_sub(region_start) / self.page_size;
let current_page = region_start + (page_idx * self.page_size);
let page_end = std::cmp::min(end, current_page + self.page_end());
// If code GC has been used, skip pages that are used by past on-stack code
if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) {
addrs.push((start, page_end));
}
start = current_page + self.page_size + self.page_start();
}
addrs
}
/// Return the code size that has been used by this CodeBlock.
pub fn code_size(&self) -> usize {
let mut size = 0;
let current_page_idx = self.write_pos / self.page_size;
for page_idx in 0..self.num_pages() {
if page_idx == current_page_idx {
// Count only actually used bytes for the current page.
size += (self.write_pos % self.page_size).saturating_sub(self.page_start());
} else if !self.has_freed_page(page_idx) {
// Count an entire range for any non-freed pages that have been used.
size += self.page_end() - self.page_start() + self.page_end_reserve;
}
}
size
}
/// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
let page_offset = self.write_pos % self.page_size;
@ -261,6 +345,11 @@ impl CodeBlock {
self.asm_comments.get(&pos)
}
pub fn clear_comments(&mut self) {
#[cfg(feature = "disasm")]
self.asm_comments.clear();
}
pub fn get_mem_size(&self) -> usize {
self.mem_size
}
@ -293,6 +382,24 @@ impl CodeBlock {
self.mem_block.borrow().start_ptr().add_bytes(offset)
}
/// Convert an address range to memory page indexes against a num_pages()-sized array.
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
let mem_start = self.mem_block.borrow().start_ptr().into_usize();
let mem_end = self.mem_block.borrow().end_ptr().into_usize();
assert!(mem_start <= start_addr.into_usize());
assert!(start_addr.into_usize() <= end_addr.into_usize());
assert!(end_addr.into_usize() <= mem_end);
// Ignore empty code ranges
if start_addr == end_addr {
return vec![];
}
let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
(start_page..=end_page).collect() // TODO: consider returning an iterator
}
/// Get a (possibly dangling) direct pointer to the current write position
pub fn get_write_ptr(&self) -> CodePtr {
self.get_ptr(self.write_pos)
@ -431,6 +538,58 @@ impl CodeBlock {
self.mem_block.borrow_mut().mark_all_executable();
}
/// Code GC. Free code pages that are not on stack and reuse them.
pub fn code_gc(&mut self) {
// The previous code GC failed to free any pages. Give up.
if CodegenGlobals::get_freed_pages() == &Some(vec![]) {
return;
}
// Check which pages are still in use
let mut pages_in_use = vec![false; self.num_pages()];
// For each ISEQ, we currently assume that only code pages used by inline code
// are used by outlined code, so we mark only code pages used by inlined code.
for_each_on_stack_iseq_payload(|iseq_payload| {
for page in &iseq_payload.pages {
pages_in_use[*page] = true;
}
});
// Outlined code generated by CodegenGlobals::init() should also be kept.
for page in CodegenGlobals::get_ocb_pages() {
pages_in_use[*page] = true;
}
// Let VirtuamMem free the pages
let freed_pages: Vec<usize> = pages_in_use.iter().enumerate()
.filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect();
self.free_pages(&freed_pages);
// Invalidate everything to have more compact code after code GC.
// This currently patches every ISEQ, which works, but in the future,
// we could limit that to patch only on-stack ISEQs for optimizing code GC.
rb_yjit_tracing_invalidate_all();
// When code GC runs next time, we could have reused pages in between
// invalidated pages. To invalidate them, we skip freezing them here.
// We free or not reuse the bytes frozen by any past invalidation, so this
// can be safely reset to pass the frozen bytes check on invalidation.
CodegenGlobals::set_inline_frozen_bytes(0);
if let Some(&first_page) = freed_pages.first() {
let mut cb = CodegenGlobals::get_inline_cb();
cb.write_pos = cb.get_page_pos(first_page);
cb.dropped_bytes = false;
cb.clear_comments();
let mut ocb = CodegenGlobals::get_outlined_cb().unwrap();
ocb.write_pos = ocb.get_page_pos(first_page);
ocb.dropped_bytes = false;
ocb.clear_comments();
}
CodegenGlobals::set_freed_pages(freed_pages);
incr_counter!(code_gc_count);
}
pub fn inline(&self) -> bool {
!self.outlined
}

Просмотреть файл

@ -643,6 +643,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
if cb.has_dropped_bytes() {
None
} else {
// Mark code pages for code GC
let iseq_payload = get_or_create_iseq_payload(iseq);
for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) {
iseq_payload.pages.insert(page);
}
Some(code_ptr)
}
}
@ -6504,6 +6509,12 @@ pub struct CodegenGlobals {
// Methods for generating code for hardcoded (usually C) methods
method_codegen_table: HashMap<usize, MethodGenFn>,
/// Page indexes for outlined code that are not associated to any ISEQ.
ocb_pages: Vec<usize>,
/// Freed page indexes. None if code GC has not been used.
freed_pages: Option<Vec<usize>>,
}
/// For implementing global code invalidation. A position in the inline
@ -6570,6 +6581,7 @@ impl CodegenGlobals {
#[cfg(test)]
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
let ocb_start_addr = ocb.unwrap().get_write_ptr();
let leave_exit_code = gen_leave_exit(&mut ocb);
let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
@ -6577,6 +6589,9 @@ impl CodegenGlobals {
// Generate full exit code for C func
let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
let ocb_end_addr = ocb.unwrap().get_write_ptr();
let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr);
// Mark all code memory as executable
cb.mark_all_executable();
ocb.unwrap().mark_all_executable();
@ -6590,6 +6605,8 @@ impl CodegenGlobals {
global_inval_patches: Vec::new(),
inline_frozen_bytes: 0,
method_codegen_table: HashMap::new(),
ocb_pages,
freed_pages: None,
};
// Register the method codegen functions
@ -6725,6 +6742,18 @@ impl CodegenGlobals {
Some(&mgf) => Some(mgf), // Deref
}
}
pub fn get_ocb_pages() -> &'static Vec<usize> {
&CodegenGlobals::get_instance().ocb_pages
}
pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> {
&mut CodegenGlobals::get_instance().freed_pages
}
pub fn set_freed_pages(freed_pages: Vec<usize>) {
CodegenGlobals::get_instance().freed_pages = Some(freed_pages)
}
}
#[cfg(test)]

Просмотреть файл

@ -11,6 +11,7 @@ use crate::utils::*;
use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::mem;
use std::rc::{Rc};
@ -321,7 +322,7 @@ struct Branch {
// Positions where the generated code starts and ends
start_addr: Option<CodePtr>,
end_addr: Option<CodePtr>,
end_addr: Option<CodePtr>, // exclusive
// Context right after the branch instruction
#[allow(unused)] // set but not read at the moment
@ -475,7 +476,11 @@ impl Eq for BlockRef {}
/// when calling into YJIT
#[derive(Default)]
pub struct IseqPayload {
// Basic block versions
version_map: VersionMap,
// Indexes of code pages used by this this ISEQ
pub pages: HashSet<usize>,
}
impl IseqPayload {
@ -498,7 +503,7 @@ pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
}
/// Get the payload object associated with an iseq. Create one if none exists.
fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
type VoidPtr = *mut c_void;
let payload_non_null = unsafe {
@ -537,6 +542,21 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
/// Iterate over all on-stack ISEQ payloads
#[cfg(not(test))]
pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
let callback: &mut &mut dyn FnMut(&IseqPayload) -> bool = std::mem::transmute(&mut *data);
if let Some(iseq_payload) = get_iseq_payload(iseq) {
callback(iseq_payload);
}
}
let mut data: &mut dyn FnMut(&IseqPayload) = &mut callback;
unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
#[cfg(test)]
pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut _callback: F) {}
/// Free the per-iseq payload
#[no_mangle]
pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
@ -854,6 +874,12 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
}
incr_counter!(compiled_block_count);
// Mark code pages for code GC
let iseq_payload = get_iseq_payload(block.blockid.iseq).unwrap();
for page in cb.addrs_to_pages(block.start_addr.unwrap(), block.end_addr.unwrap()) {
iseq_payload.pages.insert(page);
}
}
/// Remove a block version from the version map of its parent ISEQ
@ -1526,7 +1552,11 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
match block {
// Compilation failed
None => return None,
None => {
// Trigger code GC. This entry point will be recompiled later.
cb.code_gc();
return None;
}
// If the block contains no Ruby instructions
Some(block) => {
@ -1776,6 +1806,18 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
block_rc.borrow().start_addr.unwrap()
}
None => {
// Code GC needs to borrow blocks for invalidation, so their mutable
// borrows must be dropped first.
drop(block);
drop(branch);
// Trigger code GC. The whole ISEQ will be recompiled later.
// We shouldn't trigger it in the middle of compilation in branch_stub_hit
// because incomplete code could be used when cb.dropped_bytes is flipped
// by code GC. So this place, after all compilation, is the safest place
// to hook code GC on branch_stub_hit.
cb.code_gc();
branch = branch_rc.borrow_mut();
// Failed to service the stub by generating a new block so now we
// need to exit to the interpreter at the stubbed location. We are
// intentionally *not* restoring original_interp_sp. At the time of
@ -1793,7 +1835,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
let new_branch_size = branch.code_size();
assert!(
new_branch_size <= branch_size_on_entry,
"branch stubs should never enlarge branches"
"branch stubs should never enlarge branches: (old_size: {}, new_size: {})",
branch_size_on_entry, new_branch_size,
);
// Return a pointer to the compiled block version
@ -1904,7 +1947,10 @@ pub fn gen_branch(
// Get the branch targets or stubs
let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
let dst_addr1 = if let Some(ctx) = ctx1 {
get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb)
match get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb) {
Some(dst_addr) => Some(dst_addr),
None => return, // avoid unwrap() in gen_fn()
}
} else {
None
};

Просмотреть файл

@ -1278,12 +1278,18 @@ extern "C" {
lines: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_int;
}
extern "C" {
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
}
extern "C" {
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
}
extern "C" {
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
}
extern "C" {
pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
}
extern "C" {
pub fn rb_yjit_icache_invalidate(
start: *mut ::std::os::raw::c_void,

Просмотреть файл

@ -91,7 +91,7 @@ macro_rules! get_option_ref {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($option_name:ident) => {
unsafe { &(OPTIONS.$option_name) }
unsafe { &($crate::options::OPTIONS.$option_name) }
};
}
pub(crate) use get_option_ref;

Просмотреть файл

@ -253,6 +253,7 @@ make_counters! {
compiled_block_count,
compilation_failure,
freed_iseq_count,
code_gc_count,
exit_from_branch_stub,
@ -351,23 +352,37 @@ fn rb_yjit_gen_stats_dict() -> VALUE {
return Qnil;
}
macro_rules! hash_aset_usize {
($hash:ident, $counter_name:expr, $value:expr) => {
let key = rust_str_to_sym($counter_name);
let value = VALUE::fixnum_from_usize($value);
rb_hash_aset($hash, key, value);
}
}
let hash = unsafe { rb_hash_new() };
// Inline and outlined code size
// CodeBlock stats
unsafe {
// Get the inline and outlined code blocks
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
// Inline code size
let key = rust_str_to_sym("inline_code_size");
let value = VALUE::fixnum_from_usize(cb.get_write_pos());
rb_hash_aset(hash, key, value);
hash_aset_usize!(hash, "inline_code_size", cb.code_size());
// Outlined code size
let key = rust_str_to_sym("outlined_code_size");
let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
rb_hash_aset(hash, key, value);
hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
// GCed pages
let freed_page_count = cb.num_freed_pages();
hash_aset_usize!(hash, "freed_page_count", freed_page_count);
// GCed code size
hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
// Compiled pages
hash_aset_usize!(hash, "compiled_page_count", cb.num_pages() - freed_page_count);
}
// If we're not generating stats, the hash is done

Просмотреть файл

@ -51,6 +51,8 @@ pub trait Allocator {
fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
fn mark_executable(&mut self, ptr: *const u8, size: u32);
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool;
}
/// Pointer into a [VirtualMemory].
@ -91,6 +93,15 @@ impl<A: Allocator> VirtualMemory<A> {
CodePtr(self.region_start)
}
pub fn end_ptr(&self) -> CodePtr {
CodePtr(self.region_start.wrapping_add(self.mapped_region_bytes))
}
/// Size of the region in bytes that we have allocated physical memory for.
pub fn mapped_region_size(&self) -> usize {
self.mapped_region_bytes
}
/// Size of the region in bytes where writes could be attempted.
pub fn virtual_region_size(&self) -> usize {
self.region_size_bytes
@ -177,6 +188,12 @@ impl<A: Allocator> VirtualMemory<A> {
// Make mapped region executable
self.allocator.mark_executable(region_start, mapped_region_bytes);
}
/// Free a range of bytes. start_ptr must be memory page-aligned.
pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
assert_eq!(start_ptr.into_usize() % self.page_size_bytes, 0);
self.allocator.mark_unused(start_ptr.0, size);
}
}
impl CodePtr {
@ -235,6 +252,10 @@ mod sys {
fn mark_executable(&mut self, ptr: *const u8, size: u32) {
unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
}
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool {
unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) }
}
}
}
@ -258,6 +279,7 @@ pub mod tests {
enum AllocRequest {
MarkWritable{ start_idx: usize, length: usize },
MarkExecutable{ start_idx: usize, length: usize },
MarkUnused{ start_idx: usize, length: usize },
}
use AllocRequest::*;
@ -298,6 +320,13 @@ pub mod tests {
// We don't try to execute generated code in cfg(test)
// so no need to actually request executable memory.
}
fn mark_unused(&mut self, ptr: *const u8, length: u32) -> bool {
let index = self.bounds_check_request(ptr, length);
self.requests.push(MarkUnused { start_idx: index, length: length.as_usize() });
true
}
}
// Fictional architecture where each page is 4 bytes long