зеркало из https://github.com/github/ruby.git
YJIT: Allow inlining ISEQ calls with a block (#9622)
* YJIT: Allow inlining ISEQ calls with a block * Leave a TODO comment about u16 inline_block
This commit is contained in:
Родитель
557b69e83b
Коммит
27c1dd8634
|
@ -0,0 +1,7 @@
|
|||
prelude: |
|
||||
eval(<<~EOS)
|
||||
def loop_times_megamorphic
|
||||
#{"1.times {|i|};" * 1000}
|
||||
end
|
||||
EOS
|
||||
benchmark: loop_times_megamorphic
|
|
@ -8637,6 +8637,9 @@ compile_builtin_attr(rb_iseq_t *iseq, const NODE *node)
|
|||
if (strcmp(RSTRING_PTR(string), "leaf") == 0) {
|
||||
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF;
|
||||
}
|
||||
else if (strcmp(RSTRING_PTR(string), "inline_block") == 0) {
|
||||
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_INLINE_BLOCK;
|
||||
}
|
||||
else {
|
||||
goto unknown_arg;
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ module Kernel
|
|||
#++
|
||||
#
|
||||
def tap
|
||||
Primitive.attr! :inline_block
|
||||
yield(self)
|
||||
self
|
||||
end
|
||||
|
@ -127,6 +128,7 @@ module Kernel
|
|||
# then {|response| JSON.parse(response) }
|
||||
#
|
||||
def then
|
||||
Primitive.attr! :inline_block
|
||||
unless block_given?
|
||||
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
||||
end
|
||||
|
@ -142,6 +144,7 @@ module Kernel
|
|||
# "my string".yield_self {|s| s.upcase } #=> "MY STRING"
|
||||
#
|
||||
def yield_self
|
||||
Primitive.attr! :inline_block
|
||||
unless block_given?
|
||||
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
||||
end
|
||||
|
@ -178,6 +181,7 @@ module Kernel
|
|||
# puts enum.next
|
||||
# } #=> :ok
|
||||
def loop
|
||||
Primitive.attr! :inline_block
|
||||
unless block_given?
|
||||
return enum_for(:loop) { Float::INFINITY }
|
||||
end
|
||||
|
|
|
@ -229,6 +229,7 @@ class Integer
|
|||
#
|
||||
# With no block given, returns an Enumerator.
|
||||
def times
|
||||
Primitive.attr! :inline_block
|
||||
unless block_given?
|
||||
return to_enum(:times) { self < 0 ? 0 : self }
|
||||
end
|
||||
|
|
|
@ -6,7 +6,7 @@ require_relative 'ruby_vm/helpers/c_escape'
|
|||
|
||||
SUBLIBS = {}
|
||||
REQUIRED = {}
|
||||
BUILTIN_ATTRS = %w[leaf]
|
||||
BUILTIN_ATTRS = %w[leaf inline_block]
|
||||
|
||||
def string_literal(lit, str = [])
|
||||
while lit
|
||||
|
|
|
@ -368,6 +368,8 @@ enum rb_builtin_attr {
|
|||
BUILTIN_ATTR_LEAF = 0x01,
|
||||
// This iseq only contains single `opt_invokebuiltin_delegate_leave` instruction with 0 arguments.
|
||||
BUILTIN_ATTR_SINGLE_NOARG_LEAF = 0x02,
|
||||
// This attribute signals JIT to duplicate the iseq for each block iseq so that its `yield` will be monomorphic.
|
||||
BUILTIN_ATTR_INLINE_BLOCK = 0x04,
|
||||
};
|
||||
|
||||
typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *);
|
||||
|
|
1
yjit.rb
1
yjit.rb
|
@ -345,6 +345,7 @@ module RubyVM::YJIT
|
|||
if stats[:compiled_blockid_count] != 0
|
||||
out.puts "versions_per_block: " + format_number(13, "%4.3f" % (stats[:compiled_block_count].fdiv(stats[:compiled_blockid_count])))
|
||||
end
|
||||
out.puts "max_inline_versions: " + format_number(13, stats[:max_inline_versions])
|
||||
out.puts "compiled_branch_count: " + format_number(13, stats[:compiled_branch_count])
|
||||
out.puts "compile_time_ms: " + format_number(13, stats[:compile_time_ns] / (1000 * 1000))
|
||||
out.puts "block_next_count: " + format_number(13, stats[:block_next_count])
|
||||
|
|
|
@ -1810,7 +1810,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
|
|||
|
||||
// Gets the EP of the ISeq of the containing method, or "local level".
|
||||
// Equivalent of GET_LEP() macro.
|
||||
fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd {
|
||||
fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
|
||||
// Equivalent of get_lvar_level() in compile.c
|
||||
fn get_lvar_level(iseq: IseqPtr) -> u32 {
|
||||
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
|
||||
|
@ -6910,6 +6910,12 @@ fn gen_send_iseq(
|
|||
// Create a context for the callee
|
||||
let mut callee_ctx = Context::default();
|
||||
|
||||
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
|
||||
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
|
||||
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
|
||||
callee_ctx.set_inline_block(iseq);
|
||||
}
|
||||
|
||||
// Set the argument types in the callee's context
|
||||
for arg_idx in 0..argc {
|
||||
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
|
||||
|
@ -7904,6 +7910,13 @@ fn gen_invokeblock_specialized(
|
|||
Counter::guard_invokeblock_tag_changed,
|
||||
);
|
||||
|
||||
// If the current ISEQ is annotated to be inlined but it's not being inlined here,
|
||||
// generate a dynamic dispatch to avoid making this yield megamorphic.
|
||||
if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
|
||||
gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined);
|
||||
return None;
|
||||
}
|
||||
|
||||
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
|
||||
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
|
||||
|
||||
|
|
|
@ -480,6 +480,13 @@ pub struct Context {
|
|||
// Stack slot type/local_idx we track
|
||||
// 8 temp types * 4 bits, total 32 bits
|
||||
temp_payload: u32,
|
||||
|
||||
/// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
|
||||
/// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
|
||||
/// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
|
||||
/// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
|
||||
/// could allow this to consume no bytes, so we're leaving this as is.
|
||||
inline_block: u64,
|
||||
}
|
||||
|
||||
/// Tuple of (iseq, idx) used to identify basic blocks
|
||||
|
@ -1400,14 +1407,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
|
|||
}
|
||||
|
||||
/// Count the number of block versions matching a given blockid
|
||||
fn get_num_versions(blockid: BlockId) -> usize {
|
||||
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
|
||||
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
|
||||
let insn_idx = blockid.idx.as_usize();
|
||||
match get_iseq_payload(blockid.iseq) {
|
||||
Some(payload) => {
|
||||
payload
|
||||
.version_map
|
||||
.get(insn_idx)
|
||||
.map(|versions| versions.len())
|
||||
.map(|versions| {
|
||||
versions.iter().filter(|&&version|
|
||||
unsafe { version.as_ref() }.ctx.inline() == inlined
|
||||
).count()
|
||||
})
|
||||
.unwrap_or(0)
|
||||
}
|
||||
None => 0,
|
||||
|
@ -1465,6 +1477,9 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
|||
return best_version;
|
||||
}
|
||||
|
||||
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
|
||||
const MAX_INLINE_VERSIONS: usize = 1000;
|
||||
|
||||
/// Produce a generic context when the block version limit is hit for a blockid
|
||||
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
||||
// Guard chains implement limits separately, do nothing
|
||||
|
@ -1472,21 +1487,39 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
|||
return *ctx;
|
||||
}
|
||||
|
||||
let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
|
||||
let max_versions = if ctx.inline() {
|
||||
MAX_INLINE_VERSIONS
|
||||
} else {
|
||||
get_option!(max_versions)
|
||||
};
|
||||
|
||||
// If this block version we're about to add will hit the version limit
|
||||
if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
|
||||
if next_versions >= max_versions {
|
||||
// Produce a generic context that stores no type information,
|
||||
// but still respects the stack_size and sp_offset constraints.
|
||||
// This new context will then match all future requests.
|
||||
let generic_ctx = ctx.get_generic_ctx();
|
||||
|
||||
debug_assert_ne!(
|
||||
TypeDiff::Incompatible,
|
||||
ctx.diff(&generic_ctx),
|
||||
"should substitute a compatible context",
|
||||
);
|
||||
if cfg!(debug_assertions) {
|
||||
let mut ctx = ctx.clone();
|
||||
if ctx.inline() {
|
||||
// Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
|
||||
// to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
|
||||
ctx.inline_block = 0;
|
||||
assert!(generic_ctx.inline_block == 0);
|
||||
}
|
||||
|
||||
assert_ne!(
|
||||
TypeDiff::Incompatible,
|
||||
ctx.diff(&generic_ctx),
|
||||
"should substitute a compatible context",
|
||||
);
|
||||
}
|
||||
|
||||
return generic_ctx;
|
||||
}
|
||||
incr_counter_to!(max_inline_versions, next_versions);
|
||||
|
||||
return *ctx;
|
||||
}
|
||||
|
@ -2020,6 +2053,16 @@ impl Context {
|
|||
self.local_types = 0;
|
||||
}
|
||||
|
||||
/// Return true if the code is inlined by the caller
|
||||
pub fn inline(&self) -> bool {
|
||||
self.inline_block != 0
|
||||
}
|
||||
|
||||
/// Set a block ISEQ given to the Block of this Context
|
||||
pub fn set_inline_block(&mut self, iseq: IseqPtr) {
|
||||
self.inline_block = iseq as u64
|
||||
}
|
||||
|
||||
/// Compute a difference score for two context objects
|
||||
pub fn diff(&self, dst: &Context) -> TypeDiff {
|
||||
// Self is the source context (at the end of the predecessor)
|
||||
|
@ -2065,6 +2108,13 @@ impl Context {
|
|||
TypeDiff::Incompatible => return TypeDiff::Incompatible,
|
||||
};
|
||||
|
||||
// Check the block to inline
|
||||
if src.inline_block != dst.inline_block {
|
||||
// find_block_version should not find existing blocks with different
|
||||
// inline_block so that their yield will not be megamorphic.
|
||||
return TypeDiff::Incompatible;
|
||||
}
|
||||
|
||||
// For each local type we track
|
||||
for i in 0.. MAX_LOCAL_TYPES {
|
||||
let t_src = src.get_local_type(i);
|
||||
|
@ -3456,7 +3506,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn context_size() {
|
||||
assert_eq!(mem::size_of::<Context>(), 15);
|
||||
assert_eq!(mem::size_of::<Context>(), 23);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -449,6 +449,7 @@ pub struct iseq_inline_cvar_cache_entry {
|
|||
}
|
||||
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
|
||||
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
|
||||
pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
|
||||
pub type rb_builtin_attr = u32;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
|
|
@ -245,7 +245,7 @@ macro_rules! make_counters {
|
|||
|
||||
/// The list of counters that are available without --yjit-stats.
|
||||
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
|
||||
pub const DEFAULT_COUNTERS: [Counter; 8] = [
|
||||
pub const DEFAULT_COUNTERS: [Counter; 9] = [
|
||||
Counter::code_gc_count,
|
||||
Counter::compiled_iseq_entry,
|
||||
Counter::cold_iseq_entry,
|
||||
|
@ -254,6 +254,7 @@ pub const DEFAULT_COUNTERS: [Counter; 8] = [
|
|||
Counter::compiled_block_count,
|
||||
Counter::compiled_branch_count,
|
||||
Counter::compile_time_ns,
|
||||
Counter::max_inline_versions,
|
||||
];
|
||||
|
||||
/// Macro to increase a counter by name and count
|
||||
|
@ -269,6 +270,24 @@ macro_rules! incr_counter_by {
|
|||
}
|
||||
pub(crate) use incr_counter_by;
|
||||
|
||||
/// Macro to increase a counter if the given value is larger
|
||||
macro_rules! incr_counter_to {
|
||||
// Unsafe is ok here because options are initialized
|
||||
// once before any Ruby code executes
|
||||
($counter_name:ident, $count:expr) => {
|
||||
#[allow(unused_unsafe)]
|
||||
{
|
||||
unsafe {
|
||||
$crate::stats::COUNTERS.$counter_name = u64::max(
|
||||
$crate::stats::COUNTERS.$counter_name,
|
||||
$count as u64,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
pub(crate) use incr_counter_to;
|
||||
|
||||
/// Macro to increment a counter by name
|
||||
macro_rules! incr_counter {
|
||||
// Unsafe is ok here because options are initialized
|
||||
|
@ -395,6 +414,7 @@ make_counters! {
|
|||
invokeblock_iseq_arg0_args_splat,
|
||||
invokeblock_iseq_arg0_not_array,
|
||||
invokeblock_iseq_arg0_wrong_len,
|
||||
invokeblock_iseq_not_inlined,
|
||||
invokeblock_ifunc_args_splat,
|
||||
invokeblock_ifunc_kw_splat,
|
||||
invokeblock_proc,
|
||||
|
@ -518,6 +538,7 @@ make_counters! {
|
|||
defer_empty_count,
|
||||
branch_insn_count,
|
||||
branch_known_count,
|
||||
max_inline_versions,
|
||||
|
||||
freed_iseq_count,
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче