зеркало из https://github.com/github/ruby.git
YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 (#7671)
* YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 * YJIT: Define jmp_ptr_bytes on CodeBlock
This commit is contained in:
Родитель
8c360ce713
Коммит
7297374c5e
|
@ -2,10 +2,6 @@ use std::cell::RefCell;
|
|||
use std::fmt;
|
||||
use std::mem;
|
||||
use std::rc::Rc;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use crate::backend::x86_64::JMP_PTR_BYTES;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use crate::backend::arm64::JMP_PTR_BYTES;
|
||||
use crate::core::IseqPayload;
|
||||
use crate::core::for_each_off_stack_iseq_payload;
|
||||
use crate::core::for_each_on_stack_iseq_payload;
|
||||
|
@ -123,7 +119,7 @@ impl CodeBlock {
|
|||
page_size,
|
||||
write_pos: 0,
|
||||
past_page_bytes: 0,
|
||||
page_end_reserve: JMP_PTR_BYTES,
|
||||
page_end_reserve: 0,
|
||||
label_addrs: Vec::new(),
|
||||
label_names: Vec::new(),
|
||||
label_refs: Vec::new(),
|
||||
|
@ -133,6 +129,7 @@ impl CodeBlock {
|
|||
dropped_bytes: false,
|
||||
freed_pages,
|
||||
};
|
||||
cb.page_end_reserve = cb.jmp_ptr_bytes();
|
||||
cb.write_pos = cb.page_start();
|
||||
cb
|
||||
}
|
||||
|
@ -196,7 +193,7 @@ impl CodeBlock {
|
|||
self.write_pos = dst_pos;
|
||||
let dst_ptr = self.get_write_ptr();
|
||||
self.write_pos = src_pos;
|
||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
|
||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(cb.jmp_ptr_bytes())));
|
||||
|
||||
// Generate jmp_ptr from src_pos to dst_pos
|
||||
self.without_page_end_reserve(|cb| {
|
||||
|
@ -242,6 +239,11 @@ impl CodeBlock {
|
|||
self.mem_block.borrow().mapped_region_size()
|
||||
}
|
||||
|
||||
/// Size of the region in bytes where writes could be attempted.
|
||||
pub fn virtual_region_size(&self) -> usize {
|
||||
self.mem_block.borrow().virtual_region_size()
|
||||
}
|
||||
|
||||
/// Return the number of code pages that have been mapped by the VirtualMemory.
|
||||
pub fn num_mapped_pages(&self) -> usize {
|
||||
// CodeBlock's page size != VirtualMem's page size on Linux,
|
||||
|
@ -287,7 +289,7 @@ impl CodeBlock {
|
|||
if cfg!(debug_assertions) && !cfg!(test) {
|
||||
// Leave illegal instructions at the beginning of each page to assert
|
||||
// we're not accidentally crossing page boundaries.
|
||||
start += JMP_PTR_BYTES;
|
||||
start += self.jmp_ptr_bytes();
|
||||
}
|
||||
start
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#![allow(unused_variables)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use crate::asm::x86_64::jmp_ptr;
|
||||
use crate::asm::{CodeBlock};
|
||||
use crate::asm::arm64::*;
|
||||
use crate::codegen::{JITState, CodegenGlobals};
|
||||
|
@ -38,8 +39,25 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
|
|||
pub const C_SP_REG: A64Opnd = X31;
|
||||
pub const C_SP_STEP: i32 = 16;
|
||||
|
||||
// The number of bytes that are generated by emit_jmp_ptr
|
||||
pub const JMP_PTR_BYTES: usize = 20;
|
||||
impl CodeBlock {
|
||||
// The maximum number of bytes that can be generated by emit_jmp_ptr.
|
||||
pub fn jmp_ptr_bytes(&self) -> usize {
|
||||
// b instruction's offset is encoded as imm26 times 4. It can jump to
|
||||
// +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128.
|
||||
let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) {
|
||||
1 // b instruction
|
||||
} else {
|
||||
5 // 4 instructions to load a 64-bit absolute address + br instruction
|
||||
};
|
||||
num_insns * 4
|
||||
}
|
||||
|
||||
// The maximum number of instructions that can be generated by emit_conditional_jump.
|
||||
fn conditional_jump_insns(&self) -> i32 {
|
||||
// The worst case is instructions for a jump + bcond.
|
||||
self.jmp_ptr_bytes() as i32 / 4 + 1
|
||||
}
|
||||
}
|
||||
|
||||
/// Map Opnd to A64Opnd
|
||||
impl From<Opnd> for A64Opnd {
|
||||
|
@ -110,7 +128,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
|
|||
// Make sure it's always a consistent number of
|
||||
// instructions in case it gets patched and has to
|
||||
// use the other branch.
|
||||
for _ in num_insns..(JMP_PTR_BYTES / 4) {
|
||||
assert!(num_insns * 4 <= cb.jmp_ptr_bytes());
|
||||
for _ in num_insns..(cb.jmp_ptr_bytes() / 4) {
|
||||
nop(cb);
|
||||
}
|
||||
}
|
||||
|
@ -697,6 +716,18 @@ impl Assembler
|
|||
// Here we're going to return 1 because we've only
|
||||
// written out 1 instruction.
|
||||
1
|
||||
} else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond
|
||||
// If the jump offset fits into the unconditional jump as
|
||||
// an immediate value, we can use inverse b.cond + b.
|
||||
//
|
||||
// We're going to write out the inverse condition so
|
||||
// that if it doesn't match it will skip over the
|
||||
// instruction used for branching.
|
||||
bcond(cb, Condition::inverse(CONDITION), 2.into());
|
||||
b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond
|
||||
|
||||
// We've only written out 2 instructions.
|
||||
2
|
||||
} else {
|
||||
// Otherwise, we need to load the address into a
|
||||
// register and use the branch register instruction.
|
||||
|
@ -720,7 +751,8 @@ impl Assembler
|
|||
// We need to make sure we have at least 6 instructions for
|
||||
// every kind of jump for invalidation purposes, so we're
|
||||
// going to write out padding nop instructions here.
|
||||
for _ in num_insns..6 { nop(cb); }
|
||||
assert!(num_insns <= cb.conditional_jump_insns());
|
||||
for _ in num_insns..cb.conditional_jump_insns() { nop(cb); }
|
||||
}
|
||||
},
|
||||
Target::Label(label_idx) => {
|
||||
|
@ -1063,7 +1095,7 @@ impl Assembler
|
|||
Insn::RegTemps(_) |
|
||||
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
||||
Insn::PadInvalPatch => {
|
||||
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES && !cb.has_dropped_bytes() {
|
||||
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() {
|
||||
nop(cb);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,8 +34,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
|
|||
pub const C_RET_REG: Reg = RAX_REG;
|
||||
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
|
||||
|
||||
// The number of bytes that are generated by jmp_ptr
|
||||
pub const JMP_PTR_BYTES: usize = 6;
|
||||
impl CodeBlock {
|
||||
// The number of bytes that are generated by jmp_ptr
|
||||
pub fn jmp_ptr_bytes(&self) -> usize { 6 }
|
||||
}
|
||||
|
||||
/// Map Opnd to X86Opnd
|
||||
impl From<Opnd> for X86Opnd {
|
||||
|
@ -718,8 +720,8 @@ impl Assembler
|
|||
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
||||
Insn::PadInvalPatch => {
|
||||
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
|
||||
if code_size < JMP_PTR_BYTES {
|
||||
nop(cb, (JMP_PTR_BYTES - code_size) as u32);
|
||||
if code_size < cb.jmp_ptr_bytes() {
|
||||
nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче