RJIT: Implement --rjit-trace-exits

This commit is contained in:
Takashi Kokubun 2023-03-12 13:55:39 -07:00
Родитель bbd9221e46
Коммит 9cd5441d28
9 изменённых файлов: 336 добавлений и 8 удалений

Просмотреть файл

@ -6,12 +6,12 @@ module RubyVM::RJIT
# @param pc [Integer]
# @param asm [RubyVM::RJIT::Assembler]
def compile_entry_exit(pc, ctx, asm, cause:)
# Increment per-insn exit counter
incr_insn_exit(pc, asm)
# Fix pc/sp offsets for the interpreter
save_pc_and_sp(pc, ctx, asm, reset_sp_offset: false)
# Increment per-insn exit counter
count_insn_exit(pc, asm)
# Restore callee-saved registers
asm.comment("#{cause}: entry exit")
asm.pop(SP)
@ -62,12 +62,12 @@ module RubyVM::RJIT
# @param ctx [RubyVM::RJIT::Context]
# @param asm [RubyVM::RJIT::Assembler]
def compile_side_exit(pc, ctx, asm)
# Increment per-insn exit counter
incr_insn_exit(pc, asm)
# Fix pc/sp offsets for the interpreter
save_pc_and_sp(pc, ctx.dup, asm) # dup to avoid sp_offset update
# Increment per-insn exit counter
count_insn_exit(pc, asm)
# Restore callee-saved registers
asm.comment("exit to interpreter on #{pc_to_insn(pc).name}")
asm.pop(SP)
@ -105,13 +105,18 @@ module RubyVM::RJIT
# @param pc [Integer]
# @param asm [RubyVM::RJIT::Assembler]
def incr_insn_exit(pc, asm)
def count_insn_exit(pc, asm)
if C.rjit_opts.stats
insn = Compiler.decode_insn(C.VALUE.new(pc).*)
asm.comment("increment insn exit: #{insn.name}")
asm.mov(:rax, (C.rjit_insn_exits + insn.bin).to_i)
asm.add([:rax], 1) # TODO: lock
end
if C.rjit_opts.trace_exits
asm.comment('rjit_record_exit_stack')
asm.mov(C_ARGS[0], pc)
asm.call(C.rjit_record_exit_stack)
end
end
# @param jit [RubyVM::RJIT::JITState]

Просмотреть файл

@ -30,6 +30,7 @@ module RubyVM::RJIT
class << self
private
# --yjit-stats at_exit
def print_stats
stats = runtime_stats
$stderr.puts("***RJIT: Printing RJIT statistics on exit***")
@ -98,5 +99,87 @@ module RubyVM::RJIT
with_commas = d_groups.map(&:join).join(',').reverse
[with_commas, decimal].compact.join('.').rjust(pad, ' ')
end
# --yjit-trace-exits at_exit
def dump_trace_exits
filename = "#{Dir.pwd}/rjit_exit_locations.dump"
File.binwrite(filename, Marshal.dump(exit_traces))
$stderr.puts("RJIT exit locations dumped to:\n#{filename}")
end
# Convert rb_rjit_raw_samples and rb_rjit_line_samples into a StackProf format.
def exit_traces
results = C.rjit_exit_traces
raw_samples = results[:raw].dup
line_samples = results[:lines].dup
frames = results[:frames].dup
samples_count = 0
# Loop through the instructions and set the frame hash with the data.
# We use nonexistent.def for the file name, otherwise insns.def will be displayed
# and that information isn't useful in this context.
RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil, lines: {} }
results[:frames][frame_id] = frame_hash
frames[frame_id] = frame_hash
end
# Loop through the raw_samples and build the hashes for StackProf.
# The loop is based off an example in the StackProf documentation and therefore
# this functionality can only work with that library.
#
# Raw Samples:
# [ length, frame1, frame2, frameN, ..., instruction, count
#
# Line Samples
# [ length, line_1, line_2, line_n, ..., dummy value, count
i = 0
while i < raw_samples.length
stack_length = raw_samples[i] + 1
i += 1 # consume the stack length
prev_frame_id = nil
stack_length.times do |idx|
idx += i
frame_id = raw_samples[idx]
if prev_frame_id
prev_frame = frames[prev_frame_id]
prev_frame[:edges][frame_id] ||= 0
prev_frame[:edges][frame_id] += 1
end
frame_info = frames[frame_id]
frame_info[:total_samples] += 1
frame_info[:lines][line_samples[idx]] ||= [0, 0]
frame_info[:lines][line_samples[idx]][0] += 1
prev_frame_id = frame_id
end
i += stack_length # consume the stack
top_frame_id = prev_frame_id
top_frame_line = 1
sample_count = raw_samples[i]
frames[top_frame_id][:samples] += sample_count
frames[top_frame_id][:lines] ||= {}
frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
frames[top_frame_id][:lines][top_frame_line][1] += sample_count
samples_count += sample_count
i += 1
end
results[:samples] = samples_count
# Set missed_samples and gc_samples to 0 as their values
# don't matter to us in this context.
results[:missed_samples] = 0
results[:gc_samples] = 0
results
end
end
end

27
rjit.c
Просмотреть файл

@ -67,7 +67,10 @@ struct rjit_options rb_rjit_opts;
// true if RJIT is enabled.
bool rb_rjit_enabled = false;
// true if --rjit-stats (used before rb_rjit_opts is set)
bool rb_rjit_stats_enabled = false;
// true if --rjit-trace-exits (used before rb_rjit_opts is set)
bool rb_rjit_trace_exits_enabled = false;
// true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
// and `rb_rjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
bool rb_rjit_call_p = false;
@ -93,6 +96,11 @@ static VALUE rb_cRJITCfpPtr = 0;
// RubyVM::RJIT::Hooks
static VALUE rb_mRJITHooks = 0;
// Frames for --rjit-trace-exits
VALUE rb_rjit_raw_samples = 0;
// Line numbers for --rjit-trace-exits
VALUE rb_rjit_line_samples = 0;
// A default threshold used to add iseq to JIT.
#define DEFAULT_CALL_THRESHOLD 30
// Size of executable memory block in MiB.
@ -113,6 +121,9 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt)
else if (opt_match_noarg(s, l, "stats")) {
rjit_opt->stats = true;
}
else if (opt_match_noarg(s, l, "trace-exits")) {
rjit_opt->trace_exits = true;
}
else if (opt_match_arg(s, l, "call-threshold")) {
rjit_opt->call_threshold = atoi(s + 1);
}
@ -136,6 +147,7 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt)
const struct ruby_opt_message rb_rjit_option_messages[] = {
#if RJIT_STATS
M("--rjit-stats", "", "Enable collecting RJIT statistics"),
M("--rjit-trace-exits", "", "Trace side exit locations"),
#endif
M("--rjit-exec-mem-size=num", "", "Size of executable memory block in MiB (default: " STRINGIZE(DEFAULT_EXEC_MEM_SIZE) ")"),
M("--rjit-call-threshold=num", "", "Number of calls to trigger JIT (default: " STRINGIZE(DEFAULT_CALL_THRESHOLD) ")"),
@ -314,6 +326,8 @@ rb_rjit_mark(void)
rb_gc_mark(rb_cRJITIseqPtr);
rb_gc_mark(rb_cRJITCfpPtr);
rb_gc_mark(rb_mRJITHooks);
rb_gc_mark(rb_rjit_raw_samples);
rb_gc_mark(rb_rjit_line_samples);
RUBY_MARK_LEAVE("rjit");
}
@ -398,6 +412,10 @@ rb_rjit_init(const struct rjit_options *opts)
rb_cRJITIseqPtr = rb_funcall(rb_mRJITC, rb_intern("rb_iseq_t"), 0);
rb_cRJITCfpPtr = rb_funcall(rb_mRJITC, rb_intern("rb_control_frame_t"), 0);
rb_mRJITHooks = rb_const_get(rb_mRJIT, rb_intern("Hooks"));
if (rb_rjit_opts.trace_exits) {
rb_rjit_raw_samples = rb_ary_new();
rb_rjit_line_samples = rb_ary_new();
}
// Enable RJIT and stats from here
rb_rjit_call_p = !rb_rjit_opts.pause;
@ -408,13 +426,20 @@ rb_rjit_init(const struct rjit_options *opts)
// Primitive for rjit.rb
//
// Same as `RubyVM::RJIT::C.enabled?`, but this is used before rjit_init.
// Same as `rb_rjit_opts.stats`, but this is used before rb_rjit_opts is set.
static VALUE
rjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self)
{
return RBOOL(rb_rjit_stats_enabled);
}
// Same as `rb_rjit_opts.trace_exits`, but this is used before rb_rjit_opts is set.
static VALUE
rjit_trace_exits_enabled_p(rb_execution_context_t *ec, VALUE self)
{
return RBOOL(rb_rjit_trace_exits_enabled);
}
// Disable anything that could impact stats. It ends up disabling JIT calls as well.
static VALUE
rjit_stop_stats(rb_execution_context_t *ec, VALUE self)

4
rjit.h
Просмотреть файл

@ -32,6 +32,8 @@ struct rjit_options {
unsigned int exec_mem_size;
// Collect RJIT statistics
bool stats;
// Trace side exit locations
bool trace_exits;
// Enable disasm of all JIT code
bool dump_disasm;
// [experimental] Do not start RJIT until RJIT.resume is called.
@ -69,6 +71,7 @@ extern void rb_rjit_collect_vm_usage_insn(int insn);
extern bool rb_rjit_enabled;
extern bool rb_rjit_stats_enabled;
extern bool rb_rjit_trace_exits_enabled;
# else // USE_RJIT
@ -88,6 +91,7 @@ static inline void rb_rjit_tracing_invalidate_all(rb_event_flag_t new_iseq_event
#define rb_rjit_enabled false
#define rb_rjit_call_p false
#define rb_rjit_stats_enabled false
#define rb_rjit_trace_exits_enabled false
#define rb_rjit_call_threshold() UINT_MAX

Просмотреть файл

@ -18,6 +18,12 @@ module RubyVM::RJIT
print_stats
end
end
if Primitive.rjit_trace_exits_enabled_p
at_exit do
Primitive.rjit_stop_stats
dump_trace_exits
end
end
end
if RubyVM::RJIT.enabled?

193
rjit_c.c
Просмотреть файл

@ -11,6 +11,8 @@
#if USE_RJIT
#include "rjit_c.h"
#include "include/ruby/assert.h"
#include "include/ruby/debug.h"
#include "internal.h"
#include "internal/compile.h"
#include "internal/fixnum.h"
@ -206,6 +208,197 @@ rjit_get_proc_ptr(VALUE procv)
return proc;
}
// Use the same buffer size as Stackprof.
#define BUFF_LEN 2048
extern VALUE rb_rjit_raw_samples;
extern VALUE rb_rjit_line_samples;
static void
rjit_record_exit_stack(const VALUE *exit_pc)
{
// Let Primitive.rjit_stop_stats stop this
if (!rb_rjit_call_p) return;
// Get the opcode from the encoded insn handler at this PC
int insn = rb_vm_insn_addr2opcode((void *)*exit_pc);
// Create 2 array buffers to be used to collect frames and lines.
VALUE frames_buffer[BUFF_LEN] = { 0 };
int lines_buffer[BUFF_LEN] = { 0 };
// Records call frame and line information for each method entry into two
// temporary buffers. Returns the number of times we added to the buffer (ie
// the length of the stack).
//
// Call frame info is stored in the frames_buffer, line number information
// in the lines_buffer. The first argument is the start point and the second
// argument is the buffer limit, set at 2048.
int stack_length = rb_profile_frames(0, BUFF_LEN, frames_buffer, lines_buffer);
int samples_length = stack_length + 3; // 3: length, insn, count
// If yjit_raw_samples is less than or equal to the current length of the samples
// we might have seen this stack trace previously.
int prev_stack_len_index = RARRAY_LEN(rb_rjit_raw_samples) - samples_length;
VALUE prev_stack_len_obj;
if (RARRAY_LEN(rb_rjit_raw_samples) >= samples_length && FIXNUM_P(prev_stack_len_obj = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index))) {
int prev_stack_len = NUM2INT(prev_stack_len_obj);
int idx = stack_length - 1;
int prev_frame_idx = 0;
bool seen_already = true;
// If the previous stack length and current stack length are equal,
// loop and compare the current frame to the previous frame. If they are
// not equal, set seen_already to false and break out of the loop.
if (prev_stack_len == stack_length) {
while (idx >= 0) {
VALUE current_frame = frames_buffer[idx];
VALUE prev_frame = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index + prev_frame_idx + 1);
// If the current frame and previous frame are not equal, set
// seen_already to false and break out of the loop.
if (current_frame != prev_frame) {
seen_already = false;
break;
}
idx--;
prev_frame_idx++;
}
// If we know we've seen this stack before, increment the counter by 1.
if (seen_already) {
int prev_idx = RARRAY_LEN(rb_rjit_raw_samples) - 1;
int prev_count = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, prev_idx));
int new_count = prev_count + 1;
rb_ary_store(rb_rjit_raw_samples, prev_idx, INT2NUM(new_count));
rb_ary_store(rb_rjit_line_samples, prev_idx, INT2NUM(new_count));
return;
}
}
}
rb_ary_push(rb_rjit_raw_samples, INT2NUM(stack_length));
rb_ary_push(rb_rjit_line_samples, INT2NUM(stack_length));
int idx = stack_length - 1;
while (idx >= 0) {
VALUE frame = frames_buffer[idx];
int line = lines_buffer[idx];
rb_ary_push(rb_rjit_raw_samples, frame);
rb_ary_push(rb_rjit_line_samples, INT2NUM(line));
idx--;
}
// Push the insn value into the yjit_raw_samples Vec.
rb_ary_push(rb_rjit_raw_samples, INT2NUM(insn));
// Push the current line onto the yjit_line_samples Vec. This
// points to the line in insns.def.
int line = RARRAY_LEN(rb_rjit_line_samples) - 1;
rb_ary_push(rb_rjit_line_samples, INT2NUM(line));
// Push number of times seen onto the stack, which is 1
// because it's the first time we've seen it.
rb_ary_push(rb_rjit_raw_samples, INT2NUM(1));
rb_ary_push(rb_rjit_line_samples, INT2NUM(1));
}
// For a given raw_sample (frame), set the hash with the caller's
// name, file, and line number. Return the hash with collected frame_info.
static void
rjit_add_frame(VALUE hash, VALUE frame)
{
VALUE frame_id = SIZET2NUM(frame);
if (RTEST(rb_hash_aref(hash, frame_id))) {
return;
}
else {
VALUE frame_info = rb_hash_new();
// Full label for the frame
VALUE name = rb_profile_frame_full_label(frame);
// Absolute path of the frame from rb_iseq_realpath
VALUE file = rb_profile_frame_absolute_path(frame);
// Line number of the frame
VALUE line = rb_profile_frame_first_lineno(frame);
// If absolute path isn't available use the rb_iseq_path
if (NIL_P(file)) {
file = rb_profile_frame_path(frame);
}
rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0));
rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new());
rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new());
if (line != INT2FIX(0)) {
rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
}
rb_hash_aset(hash, frame_id, frame_info);
}
}
static VALUE
rjit_exit_traces(void)
{
int samples_len = RARRAY_LEN(rb_rjit_raw_samples);
RUBY_ASSERT(samples_len == RARRAY_LEN(rb_rjit_line_samples));
VALUE result = rb_hash_new();
VALUE raw_samples = rb_ary_new_capa(samples_len);
VALUE line_samples = rb_ary_new_capa(samples_len);
VALUE frames = rb_hash_new();
int idx = 0;
// While the index is less than samples_len, parse yjit_raw_samples and
// yjit_line_samples, then add casted values to raw_samples and line_samples array.
while (idx < samples_len) {
int num = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, idx));
int line_num = NUM2INT(RARRAY_AREF(rb_rjit_line_samples, idx));
idx++;
rb_ary_push(raw_samples, SIZET2NUM(num));
rb_ary_push(line_samples, INT2NUM(line_num));
// Loop through the length of samples_len and add data to the
// frames hash. Also push the current value onto the raw_samples
// and line_samples array respectively.
for (int o = 0; o < num; o++) {
rjit_add_frame(frames, RARRAY_AREF(rb_rjit_raw_samples, idx));
rb_ary_push(raw_samples, SIZET2NUM(RARRAY_AREF(rb_rjit_raw_samples, idx)));
rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
idx++;
}
// insn BIN and lineno
rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx));
rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
idx++;
// Number of times seen
rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx));
rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
idx++;
}
// Set add the raw_samples, line_samples, and frames to the results
// hash.
rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
return result;
}
// An offsetof implementation that works for unnamed struct and union.
// Multiplying 8 for compatibility with libclang's offsetof.
#define OFFSETOF(ptr, member) RB_SIZE2NUM(((char *)&ptr.member - (char*)&ptr) * 8)

Просмотреть файл

@ -294,6 +294,10 @@ module RubyVM::RJIT # :nodoc: all
}
end
def rjit_exit_traces
Primitive.cexpr! 'rjit_exit_traces()'
end
#
# Utilities: Not used by RJIT, but useful for debugging
#
@ -585,6 +589,10 @@ module RubyVM::RJIT # :nodoc: all
Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_optimized_call) }
end
def C.rjit_record_exit_stack
Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_record_exit_stack) }
end
def C.rjit_str_neq_internal
Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_str_neq_internal) }
end
@ -1239,6 +1247,7 @@ module RubyVM::RJIT # :nodoc: all
call_threshold: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), call_threshold)")],
exec_mem_size: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), exec_mem_size)")],
stats: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), stats)")],
trace_exits: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), trace_exits)")],
dump_disasm: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), dump_disasm)")],
pause: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), pause)")],
)

2
ruby.c
Просмотреть файл

@ -1617,6 +1617,8 @@ ruby_opt_init(ruby_cmdline_options_t *opt)
rb_rjit_enabled = true;
if (opt->rjit.stats)
rb_rjit_stats_enabled = true;
if (opt->rjit.trace_exits)
rb_rjit_trace_exits_enabled = true;
#endif
Init_ext(); /* load statically linked extensions before rubygems */

Просмотреть файл

@ -512,6 +512,7 @@ generator = BindingGenerator.new(
rjit_full_cfunc_return
rjit_optimized_call
rjit_str_neq_internal
rjit_record_exit_stack
],
types: %w[
CALL_DATA