Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
This commit is contained in:
Takashi Kokubun 2023-10-18 14:07:03 -07:00 коммит произвёл GitHub
Родитель 58ee088bb3
Коммит f51b92fe23
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 133 добавлений и 8 удалений

Просмотреть файл

@ -173,6 +173,7 @@ compiled, lower values mean less code is compiled (default 200000)
- `--yjit-trace-exits`: produce a Marshal dump of backtraces from specific exits. Automatically enables `--yjit-stats`
- `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
- `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)
- `--yjit-perf`: Enable frame pointers and perf profiling
Note that there is also an environment variable `RUBY_YJIT_ENABLE` which can be used to enable YJIT.
This can be useful for some deployment scripts where specifying an extra command-line option to Ruby is not practical.
@ -428,3 +429,30 @@ While in your i386 shell, install Cargo and Homebrew, then hack away!
2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install
If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier.
## Profiling with Linux perf
`--yjit-perf` allows you to profile JIT-ed methods along with other native functions using Linux perf.
When you run Ruby with `perf record`, perf looks up `/tmp/perf-{pid}.map` to resolve symbols in JIT code,
and this option lets YJIT write method symbols into that file as well as enabling frame pointers.
Here's an example way to use this option with [Firefox Profiler](https://profiler.firefox.com)
(See also: [Profiling with Linux perf](https://profiler.firefox.com/docs/#/./guide-perf-profiling)):
```bash
# Compile the interpreter with frame pointers enabled
./configure --enable-yjit --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc cflags=-fno-omit-frame-pointer
make -j && make install
# [Optional] Allow running perf without sudo
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
# Profile Ruby with --yjit-perf
cd ../yjit-bench
perf record --call-graph fp -- ruby --yjit-perf -Iharness-perf benchmarks/liquid-render/benchmark.rb
# View results on Firefox Profiler https://profiler.firefox.com.
# Create /tmp/test.perf as below and upload it using "Load a profile from file".
perf script --fields +pid > /tmp/test.perf
```

Просмотреть файл

@ -323,7 +323,6 @@ impl CodeBlock {
}
/// Return the address ranges of a given address range that this CodeBlock can write.
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
#[allow(dead_code)]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
let region_start = self.get_ptr(0).into_usize();

Просмотреть файл

@ -498,8 +498,21 @@ impl Assembler
cb.write_byte(0);
},
Insn::FrameSetup => {},
Insn::FrameTeardown => {},
// Set up RBP to work with frame pointer unwinding
// (e.g. with Linux `perf record --call-graph fp`)
Insn::FrameSetup => {
if get_option!(frame_pointer) {
push(cb, RBP);
mov(cb, RBP, RSP);
push(cb, RBP);
}
},
Insn::FrameTeardown => {
if get_option!(frame_pointer) {
pop(cb, RBP);
pop(cb, RBP);
}
},
Insn::Add { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);

Просмотреть файл

@ -21,6 +21,7 @@ use std::mem;
use std::os::raw::c_int;
use std::ptr;
use std::rc::Rc;
use std::cell::RefCell;
use std::slice;
pub use crate::virtualmem::CodePtr;
@ -97,6 +98,9 @@ pub struct JITState {
/// When true, the block is valid only when there is a total of one ractor running
pub block_assumes_single_ractor: bool,
/// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt)
perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>,
}
impl JITState {
@ -118,6 +122,7 @@ impl JITState {
bop_assumptions: vec![],
stable_constant_names_assumption: None,
block_assumes_single_ractor: false,
perf_map: Rc::default(),
}
}
@ -231,6 +236,40 @@ impl JITState {
pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) {
self.pending_outgoing.push(branch)
}
/// Mark the start address of a symbol to be reported to perf
fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) {
let symbol_name = symbol_name.to_string();
let syms = self.perf_map.clone();
asm.pos_marker(move |start| syms.borrow_mut().push((start, None, symbol_name.clone())));
}
/// Mark the end address of a symbol to be reported to perf
fn perf_symbol_range_end(&self, asm: &mut Assembler) {
let syms = self.perf_map.clone();
asm.pos_marker(move |end| {
if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() {
assert_eq!(None, *end_store);
*end_store = Some(end);
}
});
}
/// Flush addresses and symbols to /tmp/perf-{pid}.map
fn flush_perf_symbols(&self, cb: &CodeBlock) {
let path = format!("/tmp/perf-{}.map", std::process::id());
let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
for sym in self.perf_map.borrow().iter() {
if let (start, Some(end), name) = sym {
// In case the code straddles two pages, part of it belongs to the symbol.
for (inline_start, inline_end) in cb.writable_addrs(*start, *end) {
use std::io::Write;
let code_size = inline_end - inline_start;
writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap();
}
}
}
}
}
use crate::codegen::JCCKinds::*;
@ -883,6 +922,19 @@ pub fn gen_single_block(
asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
}
// Mark the start of a method name symbol for --yjit-perf
if get_option!(perf_map) {
let comptime_recv_class = jit.peek_at_self().class_of();
let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_class)) };
match (class_name, unsafe { rb_iseq_label(iseq) }) {
(Some(class_name), iseq_label) if iseq_label != Qnil => {
let iseq_label = ruby_str_to_rust(iseq_label);
jit.perf_symbol_range_start(&mut asm, &format!("[JIT] {}#{}", class_name, iseq_label));
}
_ => {},
}
}
if asm.ctx.is_return_landing() {
// Continuation of the end of gen_leave().
// Reload REG_SP for the current frame and transfer the return value
@ -1004,10 +1056,20 @@ pub fn gen_single_block(
asm.pad_inval_patch();
}
// Mark the end of a method name symbol for --yjit-perf
if get_option!(perf_map) {
jit.perf_symbol_range_end(&mut asm);
}
// Compile code into the code block
let gc_offsets = asm.compile(cb, Some(ocb));
let end_addr = cb.get_write_ptr();
// Flush perf symbols after asm.compile() writes addresses
if get_option!(perf_map) {
jit.flush_perf_symbols(cb);
}
// If code for the block doesn't fit, fail
if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
return Err(());
@ -8681,8 +8743,6 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
use std::cell::RefCell;
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming

Просмотреть файл

@ -577,7 +577,6 @@ pub fn rust_str_to_sym(str: &str) -> VALUE {
}
/// Produce an owned Rust String from a C char pointer
#[cfg(feature = "disasm")]
pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
assert!(c_char_ptr != std::ptr::null());

Просмотреть файл

@ -62,6 +62,12 @@ pub struct Options {
/// Verify context objects (debug mode only)
pub verify_ctx: bool,
/// Enable generating frame pointers (for x86. arm64 always does this)
pub frame_pointer: bool,
/// Enable writing /tmp/perf-{pid}.map for Linux perf
pub perf_map: bool,
}
// Initialize the options to default values
@ -80,10 +86,12 @@ pub static mut OPTIONS: Options = Options {
dump_disasm: None,
verify_ctx: false,
dump_iseq_disasm: None,
frame_pointer: false,
perf_map: false,
};
/// YJIT option descriptions for `ruby --help`.
static YJIT_OPTIONS: [(&str, &str); 8] = [
static YJIT_OPTIONS: [(&str, &str); 9] = [
("--yjit-stats", "Enable collecting YJIT statistics"),
("--yjit-trace-exits", "Record Ruby source location when exiting from generated code"),
("--yjit-trace-exits-sample-rate", "Trace exit locations only every Nth occurrence"),
@ -92,6 +100,7 @@ static YJIT_OPTIONS: [(&str, &str); 8] = [
("--yjit-cold-threshold=num", "Global call after which ISEQs not compiled (default: 200K)"),
("--yjit-max-versions=num", "Maximum number of versions per basic block (default: 4)"),
("--yjit-greedy-versioning", "Greedy versioning mode (default: disabled)"),
("--yjit-perf", "Enable frame pointers and perf profiling"),
];
#[derive(Clone, PartialEq, Eq, Debug)]
@ -191,6 +200,16 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
("perf", _) => match opt_val {
"" => unsafe {
OPTIONS.frame_pointer = true;
OPTIONS.perf_map = true;
},
"fp" => unsafe { OPTIONS.frame_pointer = true },
"map" => unsafe { OPTIONS.perf_map = true },
_ => return None,
},
("dump-disasm", _) => match opt_val {
"" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
directory => {

Просмотреть файл

@ -73,7 +73,7 @@ pub(crate) use offset_of;
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
fn ruby_str_to_rust(v: VALUE) -> String {
pub fn ruby_str_to_rust(v: VALUE) -> String {
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };

Просмотреть файл

@ -72,6 +72,13 @@ pub extern "C" fn rb_yjit_init_rust() {
println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
std::process::abort();
}
// Make sure --yjit-perf doesn't append symbols to an old file
if get_option!(perf_map) {
let perf_map = format!("/tmp/perf-{}.map", std::process::id());
let _ = std::fs::remove_file(&perf_map);
println!("YJIT perf map: {perf_map}");
}
}
/// At the moment, we abort in all cases we panic.