YJIT: Allow inlining ISEQ calls with a block (#9622)

* YJIT: Allow inlining ISEQ calls with a block

* Leave a TODO comment about u16 inline_block
This commit is contained in:
Takashi Kokubun 2024-01-23 11:36:23 -08:00 коммит произвёл GitHub
Родитель 557b69e83b
Коммит 27c1dd8634
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
11 изменённых файлов: 115 добавлений и 12 удалений

Просмотреть файл

@ -0,0 +1,7 @@
prelude: |
eval(<<~EOS)
def loop_times_megamorphic
#{"1.times {|i|};" * 1000}
end
EOS
benchmark: loop_times_megamorphic

Просмотреть файл

@ -8637,6 +8637,9 @@ compile_builtin_attr(rb_iseq_t *iseq, const NODE *node)
if (strcmp(RSTRING_PTR(string), "leaf") == 0) {
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF;
}
else if (strcmp(RSTRING_PTR(string), "inline_block") == 0) {
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_INLINE_BLOCK;
}
else {
goto unknown_arg;
}

Просмотреть файл

@ -87,6 +87,7 @@ module Kernel
#++
#
def tap
Primitive.attr! :inline_block
yield(self)
self
end
@ -127,6 +128,7 @@ module Kernel
# then {|response| JSON.parse(response) }
#
def then
Primitive.attr! :inline_block
unless block_given?
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
end
@ -142,6 +144,7 @@ module Kernel
# "my string".yield_self {|s| s.upcase } #=> "MY STRING"
#
def yield_self
Primitive.attr! :inline_block
unless block_given?
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
end
@ -178,6 +181,7 @@ module Kernel
# puts enum.next
# } #=> :ok
def loop
Primitive.attr! :inline_block
unless block_given?
return enum_for(:loop) { Float::INFINITY }
end

Просмотреть файл

@ -229,6 +229,7 @@ class Integer
#
# With no block given, returns an Enumerator.
def times
Primitive.attr! :inline_block
unless block_given?
return to_enum(:times) { self < 0 ? 0 : self }
end

Просмотреть файл

@ -6,7 +6,7 @@ require_relative 'ruby_vm/helpers/c_escape'
SUBLIBS = {}
REQUIRED = {}
BUILTIN_ATTRS = %w[leaf]
BUILTIN_ATTRS = %w[leaf inline_block]
def string_literal(lit, str = [])
while lit

Просмотреть файл

@ -368,6 +368,8 @@ enum rb_builtin_attr {
BUILTIN_ATTR_LEAF = 0x01,
// This iseq only contains single `opt_invokebuiltin_delegate_leave` instruction with 0 arguments.
BUILTIN_ATTR_SINGLE_NOARG_LEAF = 0x02,
// This attribute signals JIT to duplicate the iseq for each block iseq so that its `yield` will be monomorphic.
BUILTIN_ATTR_INLINE_BLOCK = 0x04,
};
typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *);

Просмотреть файл

@ -345,6 +345,7 @@ module RubyVM::YJIT
if stats[:compiled_blockid_count] != 0
out.puts "versions_per_block: " + format_number(13, "%4.3f" % (stats[:compiled_block_count].fdiv(stats[:compiled_blockid_count])))
end
out.puts "max_inline_versions: " + format_number(13, stats[:max_inline_versions])
out.puts "compiled_branch_count: " + format_number(13, stats[:compiled_branch_count])
out.puts "compile_time_ms: " + format_number(13, stats[:compile_time_ns] / (1000 * 1000))
out.puts "block_next_count: " + format_number(13, stats[:block_next_count])

Просмотреть файл

@ -1810,7 +1810,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
// Gets the EP of the ISeq of the containing method, or "local level".
// Equivalent of GET_LEP() macro.
fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd {
fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
// Equivalent of get_lvar_level() in compile.c
fn get_lvar_level(iseq: IseqPtr) -> u32 {
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
@ -6910,6 +6910,12 @@ fn gen_send_iseq(
// Create a context for the callee
let mut callee_ctx = Context::default();
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
callee_ctx.set_inline_block(iseq);
}
// Set the argument types in the callee's context
for arg_idx in 0..argc {
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
@ -7904,6 +7910,13 @@ fn gen_invokeblock_specialized(
Counter::guard_invokeblock_tag_changed,
);
// If the current ISEQ is annotated to be inlined but it's not being inlined here,
// generate a dynamic dispatch to avoid making this yield megamorphic.
if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined);
return None;
}
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };

Просмотреть файл

@ -480,6 +480,13 @@ pub struct Context {
// Stack slot type/local_idx we track
// 8 temp types * 4 bits, total 32 bits
temp_payload: u32,
/// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
/// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
/// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
/// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
/// could allow this to consume no bytes, so we're leaving this as is.
inline_block: u64,
}
/// Tuple of (iseq, idx) used to identify basic blocks
@ -1400,14 +1407,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
/// Count the number of block versions matching a given blockid
fn get_num_versions(blockid: BlockId) -> usize {
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
Some(payload) => {
payload
.version_map
.get(insn_idx)
.map(|versions| versions.len())
.map(|versions| {
versions.iter().filter(|&&version|
unsafe { version.as_ref() }.ctx.inline() == inlined
).count()
})
.unwrap_or(0)
}
None => 0,
@ -1465,6 +1477,9 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version;
}
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
const MAX_INLINE_VERSIONS: usize = 1000;
/// Produce a generic context when the block version limit is hit for a blockid
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
// Guard chains implement limits separately, do nothing
@ -1472,21 +1487,39 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx;
}
let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
let max_versions = if ctx.inline() {
MAX_INLINE_VERSIONS
} else {
get_option!(max_versions)
};
// If this block version we're about to add will hit the version limit
if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
if next_versions >= max_versions {
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints.
// This new context will then match all future requests.
let generic_ctx = ctx.get_generic_ctx();
debug_assert_ne!(
TypeDiff::Incompatible,
ctx.diff(&generic_ctx),
"should substitute a compatible context",
);
if cfg!(debug_assertions) {
let mut ctx = ctx.clone();
if ctx.inline() {
// Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
// to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
ctx.inline_block = 0;
assert!(generic_ctx.inline_block == 0);
}
assert_ne!(
TypeDiff::Incompatible,
ctx.diff(&generic_ctx),
"should substitute a compatible context",
);
}
return generic_ctx;
}
incr_counter_to!(max_inline_versions, next_versions);
return *ctx;
}
@ -2020,6 +2053,16 @@ impl Context {
self.local_types = 0;
}
/// Return true if the code is inlined by the caller
pub fn inline(&self) -> bool {
self.inline_block != 0
}
/// Set a block ISEQ given to the Block of this Context
pub fn set_inline_block(&mut self, iseq: IseqPtr) {
self.inline_block = iseq as u64
}
/// Compute a difference score for two context objects
pub fn diff(&self, dst: &Context) -> TypeDiff {
// Self is the source context (at the end of the predecessor)
@ -2065,6 +2108,13 @@ impl Context {
TypeDiff::Incompatible => return TypeDiff::Incompatible,
};
// Check the block to inline
if src.inline_block != dst.inline_block {
// find_block_version should not find existing blocks with different
// inline_block so that their yield will not be megamorphic.
return TypeDiff::Incompatible;
}
// For each local type we track
for i in 0.. MAX_LOCAL_TYPES {
let t_src = src.get_local_type(i);
@ -3456,7 +3506,7 @@ mod tests {
#[test]
fn context_size() {
assert_eq!(mem::size_of::<Context>(), 15);
assert_eq!(mem::size_of::<Context>(), 23);
}
#[test]

Просмотреть файл

@ -449,6 +449,7 @@ pub struct iseq_inline_cvar_cache_entry {
}
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
pub type rb_builtin_attr = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]

Просмотреть файл

@ -245,7 +245,7 @@ macro_rules! make_counters {
/// The list of counters that are available without --yjit-stats.
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
pub const DEFAULT_COUNTERS: [Counter; 8] = [
pub const DEFAULT_COUNTERS: [Counter; 9] = [
Counter::code_gc_count,
Counter::compiled_iseq_entry,
Counter::cold_iseq_entry,
@ -254,6 +254,7 @@ pub const DEFAULT_COUNTERS: [Counter; 8] = [
Counter::compiled_block_count,
Counter::compiled_branch_count,
Counter::compile_time_ns,
Counter::max_inline_versions,
];
/// Macro to increase a counter by name and count
@ -269,6 +270,24 @@ macro_rules! incr_counter_by {
}
pub(crate) use incr_counter_by;
/// Macro to increase a counter if the given value is larger
macro_rules! incr_counter_to {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($counter_name:ident, $count:expr) => {
#[allow(unused_unsafe)]
{
unsafe {
$crate::stats::COUNTERS.$counter_name = u64::max(
$crate::stats::COUNTERS.$counter_name,
$count as u64,
)
}
}
};
}
pub(crate) use incr_counter_to;
/// Macro to increment a counter by name
macro_rules! incr_counter {
// Unsafe is ok here because options are initialized
@ -395,6 +414,7 @@ make_counters! {
invokeblock_iseq_arg0_args_splat,
invokeblock_iseq_arg0_not_array,
invokeblock_iseq_arg0_wrong_len,
invokeblock_iseq_not_inlined,
invokeblock_ifunc_args_splat,
invokeblock_ifunc_kw_splat,
invokeblock_proc,
@ -518,6 +538,7 @@ make_counters! {
defer_empty_count,
branch_insn_count,
branch_known_count,
max_inline_versions,
freed_iseq_count,