зеркало из https://github.com/github/ruby.git
YJIT: Add --yjit-perf (#8697)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
This commit is contained in:
Родитель
58ee088bb3
Коммит
f51b92fe23
|
@ -173,6 +173,7 @@ compiled, lower values mean less code is compiled (default 200000)
|
|||
- `--yjit-trace-exits`: produce a Marshal dump of backtraces from specific exits. Automatically enables `--yjit-stats`
|
||||
- `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
|
||||
- `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)
|
||||
- `--yjit-perf`: Enable frame pointers and perf profiling
|
||||
|
||||
Note that there is also an environment variable `RUBY_YJIT_ENABLE` which can be used to enable YJIT.
|
||||
This can be useful for some deployment scripts where specifying an extra command-line option to Ruby is not practical.
|
||||
|
@ -428,3 +429,30 @@ While in your i386 shell, install Cargo and Homebrew, then hack away!
|
|||
2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install
|
||||
|
||||
If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier.
|
||||
|
||||
## Profiling with Linux perf
|
||||
|
||||
`--yjit-perf` allows you to profile JIT-ed methods along with other native functions using Linux perf.
|
||||
When you run Ruby with `perf record`, perf looks up `/tmp/perf-{pid}.map` to resolve symbols in JIT code,
|
||||
and this option lets YJIT write method symbols into that file as well as enabling frame pointers.
|
||||
|
||||
Here's an example way to use this option with [Firefox Profiler](https://profiler.firefox.com)
|
||||
(See also: [Profiling with Linux perf](https://profiler.firefox.com/docs/#/./guide-perf-profiling)):
|
||||
|
||||
```bash
|
||||
# Compile the interpreter with frame pointers enabled
|
||||
./configure --enable-yjit --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc cflags=-fno-omit-frame-pointer
|
||||
make -j && make install
|
||||
|
||||
# [Optional] Allow running perf without sudo
|
||||
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
|
||||
echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
|
||||
# Profile Ruby with --yjit-perf
|
||||
cd ../yjit-bench
|
||||
perf record --call-graph fp -- ruby --yjit-perf -Iharness-perf benchmarks/liquid-render/benchmark.rb
|
||||
|
||||
# View results on Firefox Profiler https://profiler.firefox.com.
|
||||
# Create /tmp/test.perf as below and upload it using "Load a profile from file".
|
||||
perf script --fields +pid > /tmp/test.perf
|
||||
```
|
||||
|
|
|
@ -323,7 +323,6 @@ impl CodeBlock {
|
|||
}
|
||||
|
||||
/// Return the address ranges of a given address range that this CodeBlock can write.
|
||||
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
|
||||
#[allow(dead_code)]
|
||||
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
|
||||
let region_start = self.get_ptr(0).into_usize();
|
||||
|
|
|
@ -498,8 +498,21 @@ impl Assembler
|
|||
cb.write_byte(0);
|
||||
},
|
||||
|
||||
Insn::FrameSetup => {},
|
||||
Insn::FrameTeardown => {},
|
||||
// Set up RBP to work with frame pointer unwinding
|
||||
// (e.g. with Linux `perf record --call-graph fp`)
|
||||
Insn::FrameSetup => {
|
||||
if get_option!(frame_pointer) {
|
||||
push(cb, RBP);
|
||||
mov(cb, RBP, RSP);
|
||||
push(cb, RBP);
|
||||
}
|
||||
},
|
||||
Insn::FrameTeardown => {
|
||||
if get_option!(frame_pointer) {
|
||||
pop(cb, RBP);
|
||||
pop(cb, RBP);
|
||||
}
|
||||
},
|
||||
|
||||
Insn::Add { left, right, .. } => {
|
||||
let opnd1 = emit_64bit_immediate(cb, right);
|
||||
|
|
|
@ -21,6 +21,7 @@ use std::mem;
|
|||
use std::os::raw::c_int;
|
||||
use std::ptr;
|
||||
use std::rc::Rc;
|
||||
use std::cell::RefCell;
|
||||
use std::slice;
|
||||
|
||||
pub use crate::virtualmem::CodePtr;
|
||||
|
@ -97,6 +98,9 @@ pub struct JITState {
|
|||
|
||||
/// When true, the block is valid only when there is a total of one ractor running
|
||||
pub block_assumes_single_ractor: bool,
|
||||
|
||||
/// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt)
|
||||
perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>,
|
||||
}
|
||||
|
||||
impl JITState {
|
||||
|
@ -118,6 +122,7 @@ impl JITState {
|
|||
bop_assumptions: vec![],
|
||||
stable_constant_names_assumption: None,
|
||||
block_assumes_single_ractor: false,
|
||||
perf_map: Rc::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,6 +236,40 @@ impl JITState {
|
|||
pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) {
|
||||
self.pending_outgoing.push(branch)
|
||||
}
|
||||
|
||||
/// Mark the start address of a symbol to be reported to perf
|
||||
fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) {
|
||||
let symbol_name = symbol_name.to_string();
|
||||
let syms = self.perf_map.clone();
|
||||
asm.pos_marker(move |start| syms.borrow_mut().push((start, None, symbol_name.clone())));
|
||||
}
|
||||
|
||||
/// Mark the end address of a symbol to be reported to perf
|
||||
fn perf_symbol_range_end(&self, asm: &mut Assembler) {
|
||||
let syms = self.perf_map.clone();
|
||||
asm.pos_marker(move |end| {
|
||||
if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() {
|
||||
assert_eq!(None, *end_store);
|
||||
*end_store = Some(end);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Flush addresses and symbols to /tmp/perf-{pid}.map
|
||||
fn flush_perf_symbols(&self, cb: &CodeBlock) {
|
||||
let path = format!("/tmp/perf-{}.map", std::process::id());
|
||||
let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
|
||||
for sym in self.perf_map.borrow().iter() {
|
||||
if let (start, Some(end), name) = sym {
|
||||
// In case the code straddles two pages, part of it belongs to the symbol.
|
||||
for (inline_start, inline_end) in cb.writable_addrs(*start, *end) {
|
||||
use std::io::Write;
|
||||
let code_size = inline_end - inline_start;
|
||||
writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use crate::codegen::JCCKinds::*;
|
||||
|
@ -883,6 +922,19 @@ pub fn gen_single_block(
|
|||
asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
|
||||
}
|
||||
|
||||
// Mark the start of a method name symbol for --yjit-perf
|
||||
if get_option!(perf_map) {
|
||||
let comptime_recv_class = jit.peek_at_self().class_of();
|
||||
let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_class)) };
|
||||
match (class_name, unsafe { rb_iseq_label(iseq) }) {
|
||||
(Some(class_name), iseq_label) if iseq_label != Qnil => {
|
||||
let iseq_label = ruby_str_to_rust(iseq_label);
|
||||
jit.perf_symbol_range_start(&mut asm, &format!("[JIT] {}#{}", class_name, iseq_label));
|
||||
}
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
|
||||
if asm.ctx.is_return_landing() {
|
||||
// Continuation of the end of gen_leave().
|
||||
// Reload REG_SP for the current frame and transfer the return value
|
||||
|
@ -1004,10 +1056,20 @@ pub fn gen_single_block(
|
|||
asm.pad_inval_patch();
|
||||
}
|
||||
|
||||
// Mark the end of a method name symbol for --yjit-perf
|
||||
if get_option!(perf_map) {
|
||||
jit.perf_symbol_range_end(&mut asm);
|
||||
}
|
||||
|
||||
// Compile code into the code block
|
||||
let gc_offsets = asm.compile(cb, Some(ocb));
|
||||
let end_addr = cb.get_write_ptr();
|
||||
|
||||
// Flush perf symbols after asm.compile() writes addresses
|
||||
if get_option!(perf_map) {
|
||||
jit.flush_perf_symbols(cb);
|
||||
}
|
||||
|
||||
// If code for the block doesn't fit, fail
|
||||
if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
|
||||
return Err(());
|
||||
|
@ -8681,8 +8743,6 @@ impl CodegenGlobals {
|
|||
|
||||
#[cfg(not(test))]
|
||||
let (mut cb, mut ocb) = {
|
||||
use std::cell::RefCell;
|
||||
|
||||
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
|
||||
|
||||
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
|
||||
|
|
|
@ -577,7 +577,6 @@ pub fn rust_str_to_sym(str: &str) -> VALUE {
|
|||
}
|
||||
|
||||
/// Produce an owned Rust String from a C char pointer
|
||||
#[cfg(feature = "disasm")]
|
||||
pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
|
||||
assert!(c_char_ptr != std::ptr::null());
|
||||
|
||||
|
|
|
@ -62,6 +62,12 @@ pub struct Options {
|
|||
|
||||
/// Verify context objects (debug mode only)
|
||||
pub verify_ctx: bool,
|
||||
|
||||
/// Enable generating frame pointers (for x86. arm64 always does this)
|
||||
pub frame_pointer: bool,
|
||||
|
||||
/// Enable writing /tmp/perf-{pid}.map for Linux perf
|
||||
pub perf_map: bool,
|
||||
}
|
||||
|
||||
// Initialize the options to default values
|
||||
|
@ -80,10 +86,12 @@ pub static mut OPTIONS: Options = Options {
|
|||
dump_disasm: None,
|
||||
verify_ctx: false,
|
||||
dump_iseq_disasm: None,
|
||||
frame_pointer: false,
|
||||
perf_map: false,
|
||||
};
|
||||
|
||||
/// YJIT option descriptions for `ruby --help`.
|
||||
static YJIT_OPTIONS: [(&str, &str); 8] = [
|
||||
static YJIT_OPTIONS: [(&str, &str); 9] = [
|
||||
("--yjit-stats", "Enable collecting YJIT statistics"),
|
||||
("--yjit-trace-exits", "Record Ruby source location when exiting from generated code"),
|
||||
("--yjit-trace-exits-sample-rate", "Trace exit locations only every Nth occurrence"),
|
||||
|
@ -92,6 +100,7 @@ static YJIT_OPTIONS: [(&str, &str); 8] = [
|
|||
("--yjit-cold-threshold=num", "Global call after which ISEQs not compiled (default: 200K)"),
|
||||
("--yjit-max-versions=num", "Maximum number of versions per basic block (default: 4)"),
|
||||
("--yjit-greedy-versioning", "Greedy versioning mode (default: disabled)"),
|
||||
("--yjit-perf", "Enable frame pointers and perf profiling"),
|
||||
];
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
|
@ -191,6 +200,16 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
|
|||
}
|
||||
},
|
||||
|
||||
("perf", _) => match opt_val {
|
||||
"" => unsafe {
|
||||
OPTIONS.frame_pointer = true;
|
||||
OPTIONS.perf_map = true;
|
||||
},
|
||||
"fp" => unsafe { OPTIONS.frame_pointer = true },
|
||||
"map" => unsafe { OPTIONS.perf_map = true },
|
||||
_ => return None,
|
||||
},
|
||||
|
||||
("dump-disasm", _) => match opt_val {
|
||||
"" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
|
||||
directory => {
|
||||
|
|
|
@ -73,7 +73,7 @@ pub(crate) use offset_of;
|
|||
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
|
||||
// This should work fine on ASCII strings and anything else
|
||||
// that is considered legal UTF-8, including embedded nulls.
|
||||
fn ruby_str_to_rust(v: VALUE) -> String {
|
||||
pub fn ruby_str_to_rust(v: VALUE) -> String {
|
||||
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
|
||||
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
|
||||
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
|
||||
|
|
|
@ -72,6 +72,13 @@ pub extern "C" fn rb_yjit_init_rust() {
|
|||
println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
|
||||
std::process::abort();
|
||||
}
|
||||
|
||||
// Make sure --yjit-perf doesn't append symbols to an old file
|
||||
if get_option!(perf_map) {
|
||||
let perf_map = format!("/tmp/perf-{}.map", std::process::id());
|
||||
let _ = std::fs::remove_file(&perf_map);
|
||||
println!("YJIT perf map: {perf_map}");
|
||||
}
|
||||
}
|
||||
|
||||
/// At the moment, we abort in all cases we panic.
|
||||
|
|
Загрузка…
Ссылка в новой задаче