зеркало из https://github.com/github/ruby.git
YJIT: Break register cycles for C arguments (take 2) (#8018)
* Revert "Revert "YJIT: Break register cycles for C arguments (#7918)""
This reverts commit 78ca085785
.
* Use shfited_live_ranges for the last-insn check
This commit is contained in:
Родитель
f6af5a1128
Коммит
a1d4dada6b
|
@ -3973,3 +3973,11 @@ assert_equal 'true', %q{
|
|||
true
|
||||
end
|
||||
} unless defined?(RubyVM::RJIT) && RubyVM::RJIT.enabled? # Not yet working on RJIT
|
||||
|
||||
# Regresssion test: register allocator on expandarray
|
||||
assert_equal '[]', %q{
|
||||
func = proc { [] }
|
||||
proc do
|
||||
_x, _y = func.call
|
||||
end.call
|
||||
}
|
||||
|
|
|
@ -74,6 +74,7 @@ impl From<Opnd> for A64Opnd {
|
|||
Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => {
|
||||
panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base")
|
||||
},
|
||||
Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"),
|
||||
Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"),
|
||||
Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
|
||||
Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"),
|
||||
|
@ -185,9 +186,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
|
|||
|
||||
impl Assembler
|
||||
{
|
||||
// A special scratch register for intermediate processing.
|
||||
// Special scratch registers for intermediate processing.
|
||||
// This register is caller-saved (so we don't have to save it before using it)
|
||||
const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG);
|
||||
pub const SCRATCH_REG: Reg = X16_REG;
|
||||
const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
|
||||
const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
|
||||
|
||||
/// List of registers that can be used for stack temps.
|
||||
|
@ -280,7 +282,7 @@ impl Assembler
|
|||
/// do follow that encoding, and if they don't then we load them first.
|
||||
fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
|
||||
match opnd {
|
||||
Opnd::Reg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd,
|
||||
Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd,
|
||||
Opnd::Mem(_) => split_load_operand(asm, opnd),
|
||||
Opnd::Imm(imm) => {
|
||||
if imm == 0 {
|
||||
|
@ -313,7 +315,7 @@ impl Assembler
|
|||
/// a certain size. If they don't then we need to load them first.
|
||||
fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
|
||||
match opnd {
|
||||
Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
|
||||
Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd,
|
||||
Opnd::Mem(_) => split_load_operand(asm, opnd),
|
||||
Opnd::Imm(_) => asm.load(opnd),
|
||||
Opnd::UImm(uimm) => {
|
||||
|
@ -452,7 +454,7 @@ impl Assembler
|
|||
_ => *opnd
|
||||
};
|
||||
|
||||
asm.load_into(C_ARG_OPNDS[idx], value);
|
||||
asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value);
|
||||
}
|
||||
|
||||
// Now we push the CCall without any arguments so that it
|
||||
|
@ -924,6 +926,9 @@ impl Assembler
|
|||
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
|
||||
insn_gc_offsets.push(ptr_offset);
|
||||
},
|
||||
Opnd::CArg { .. } => {
|
||||
unreachable!("C argument operand was not lowered before arm64_emit");
|
||||
}
|
||||
Opnd::Stack { .. } => {
|
||||
unreachable!("Stack operand was not lowered before arm64_emit");
|
||||
}
|
||||
|
|
|
@ -72,6 +72,9 @@ pub enum Opnd
|
|||
// Immediate Ruby value, may be GC'd, movable
|
||||
Value(VALUE),
|
||||
|
||||
/// C argument register. The alloc_regs resolves its register dependencies.
|
||||
CArg(Reg),
|
||||
|
||||
// Output of a preceding instruction in this block
|
||||
InsnOut{ idx: usize, num_bits: u8 },
|
||||
|
||||
|
@ -102,6 +105,7 @@ impl fmt::Debug for Opnd {
|
|||
match self {
|
||||
Self::None => write!(fmt, "None"),
|
||||
Value(val) => write!(fmt, "Value({val:?})"),
|
||||
CArg(reg) => write!(fmt, "CArg({reg:?})"),
|
||||
Stack { idx, sp_offset, .. } => write!(fmt, "SP[{}]", *sp_offset as i32 - idx - 1),
|
||||
InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"),
|
||||
Imm(signed) => write!(fmt, "{signed:x}_i64"),
|
||||
|
@ -145,6 +149,14 @@ impl Opnd
|
|||
Opnd::UImm(ptr as u64)
|
||||
}
|
||||
|
||||
/// Constructor for a C argument operand
|
||||
pub fn c_arg(reg_opnd: Opnd) -> Self {
|
||||
match reg_opnd {
|
||||
Opnd::Reg(reg) => Opnd::CArg(reg),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_some(&self) -> bool {
|
||||
match *self {
|
||||
Opnd::None => false,
|
||||
|
@ -1224,6 +1236,55 @@ impl Assembler
|
|||
}
|
||||
}
|
||||
|
||||
// Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
|
||||
// so that they will not rewrite each other before they are used.
|
||||
fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
|
||||
// Return the index of a move whose destination is not used as a source if any.
|
||||
fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
|
||||
c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
|
||||
c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
|
||||
}).map(|(index, _)| index)
|
||||
}
|
||||
|
||||
// Remove moves whose source and destination are the same
|
||||
let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
|
||||
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
|
||||
|
||||
let mut moves = vec![];
|
||||
while c_args.len() > 0 {
|
||||
// Keep taking safe moves
|
||||
while let Some(index) = find_safe_arg(&c_args) {
|
||||
moves.push(c_args.remove(index));
|
||||
}
|
||||
|
||||
// No safe move. Load the source of one move into SCRATCH_REG, and
|
||||
// then load SCRATCH_REG into the destination when it's safe.
|
||||
if c_args.len() > 0 {
|
||||
// Make sure it's safe to use SCRATCH_REG
|
||||
assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
|
||||
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
|
||||
let (reg, opnd) = c_args.remove(0);
|
||||
moves.push((Assembler::SCRATCH_REG, opnd));
|
||||
c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
}
|
||||
}
|
||||
moves
|
||||
}
|
||||
|
||||
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
|
||||
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
|
||||
if shift_offset >= 0 {
|
||||
for index in 0..(shift_offset as usize) {
|
||||
live_ranges.insert(start_index + index, start_index + index);
|
||||
}
|
||||
} else {
|
||||
for _ in 0..-shift_offset {
|
||||
live_ranges.remove(start_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dump live registers for register spill debugging.
|
||||
fn dump_live_regs(insns: Vec<Insn>, live_ranges: Vec<usize>, num_regs: usize, spill_index: usize) {
|
||||
// Convert live_ranges to live_regs: the number of live registers at each index
|
||||
|
@ -1247,11 +1308,18 @@ impl Assembler
|
|||
}
|
||||
}
|
||||
|
||||
// We may need to reorder LoadInto instructions with a C argument operand.
|
||||
// This buffers the operands of such instructions to process them in batches.
|
||||
let mut c_args: Vec<(Reg, Opnd)> = vec![];
|
||||
|
||||
// live_ranges is indexed by original `index` given by the iterator.
|
||||
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
|
||||
// shifted_live_ranges is indexed by mapped indexes in insn operands.
|
||||
let mut shifted_live_ranges: Vec<usize> = live_ranges.clone();
|
||||
let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
|
||||
let mut iterator = self.into_draining_iter();
|
||||
|
||||
while let Some((index, mut insn)) = iterator.next_unmapped() {
|
||||
while let Some((index, mut insn)) = iterator.next_mapped() {
|
||||
// Check if this is the last instruction that uses an operand that
|
||||
// spans more than one instruction. In that case, return the
|
||||
// allocated register to the pool.
|
||||
|
@ -1262,12 +1330,11 @@ impl Assembler
|
|||
// Since we have an InsnOut, we know it spans more that one
|
||||
// instruction.
|
||||
let start_index = *idx;
|
||||
assert!(start_index < index);
|
||||
|
||||
// We're going to check if this is the last instruction that
|
||||
// uses this operand. If it is, we can return the allocated
|
||||
// register to the pool.
|
||||
if live_ranges[start_index] == index {
|
||||
if shifted_live_ranges[start_index] == index {
|
||||
if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() {
|
||||
dealloc_reg(&mut pool, ®s, reg);
|
||||
} else {
|
||||
|
@ -1314,7 +1381,7 @@ impl Assembler
|
|||
let mut opnd_iter = insn.opnd_iter();
|
||||
|
||||
if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() {
|
||||
if live_ranges[*idx] == index {
|
||||
if shifted_live_ranges[*idx] == index {
|
||||
if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() {
|
||||
out_reg = Some(take_reg(&mut pool, ®s, reg));
|
||||
}
|
||||
|
@ -1371,7 +1438,27 @@ impl Assembler
|
|||
}
|
||||
}
|
||||
|
||||
asm.push_insn(insn);
|
||||
// Push instruction(s). Batch and reorder C argument operations if needed.
|
||||
if let Insn::LoadInto { dest: Opnd::CArg(reg), opnd } = insn {
|
||||
// Buffer C arguments
|
||||
c_args.push((reg, opnd));
|
||||
} else {
|
||||
// C arguments are buffered until CCall
|
||||
if c_args.len() > 0 {
|
||||
// Resolve C argument dependencies
|
||||
let c_args_len = c_args.len() as isize;
|
||||
let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
|
||||
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
|
||||
|
||||
// Push batched C arguments
|
||||
for (reg, opnd) in moves {
|
||||
asm.load_into(Opnd::Reg(reg), opnd);
|
||||
}
|
||||
}
|
||||
// Other instructions are pushed as is
|
||||
asm.push_insn(insn);
|
||||
}
|
||||
iterator.map_insn_index(&mut asm);
|
||||
}
|
||||
|
||||
assert_eq!(pool, 0, "Expected all registers to be returned to the pool");
|
||||
|
@ -1442,7 +1529,7 @@ impl AssemblerDrainingIterator {
|
|||
/// end of the current list of instructions in order to maintain that
|
||||
/// alignment.
|
||||
pub fn map_insn_index(&mut self, asm: &mut Assembler) {
|
||||
self.indices.push(asm.insns.len() - 1);
|
||||
self.indices.push(asm.insns.len().saturating_sub(1));
|
||||
}
|
||||
|
||||
/// Map an operand by using this iterator's list of mapped indices.
|
||||
|
|
|
@ -87,9 +87,9 @@ impl From<&Opnd> for X86Opnd {
|
|||
impl Assembler
|
||||
{
|
||||
// A special scratch register for intermediate processing.
|
||||
// Note: right now this is only used by LeaLabel because label_ref accepts
|
||||
// a closure and we don't want it to have to capture anything.
|
||||
const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG);
|
||||
// This register is caller-saved (so we don't have to save it before using it)
|
||||
pub const SCRATCH_REG: Reg = R11_REG;
|
||||
const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
|
||||
|
||||
/// List of registers that can be used for stack temps.
|
||||
pub const TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
|
||||
|
@ -347,7 +347,7 @@ impl Assembler
|
|||
// Load each operand into the corresponding argument
|
||||
// register.
|
||||
for (idx, opnd) in opnds.into_iter().enumerate() {
|
||||
asm.load_into(C_ARG_OPNDS[idx], *opnd);
|
||||
asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd);
|
||||
}
|
||||
|
||||
// Now we push the CCall without any arguments so that it
|
||||
|
@ -1055,4 +1055,118 @@ mod tests {
|
|||
|
||||
assert_eq!(format!("{:x}", cb), "4983f540");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reorder_c_args_no_cycle() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
asm.ccall(0 as _, vec![
|
||||
C_ARG_OPNDS[0], // mov rdi, rdi (optimized away)
|
||||
C_ARG_OPNDS[1], // mov rsi, rsi (optimized away)
|
||||
]);
|
||||
asm.compile_with_num_regs(&mut cb, 0);
|
||||
|
||||
assert_disasm!(cb, "b800000000ffd0", {"
|
||||
0x0: mov eax, 0
|
||||
0x5: call rax
|
||||
"});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reorder_c_args_single_cycle() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
// rdi and rsi form a cycle
|
||||
asm.ccall(0 as _, vec![
|
||||
C_ARG_OPNDS[1], // mov rdi, rsi
|
||||
C_ARG_OPNDS[0], // mov rsi, rdi
|
||||
C_ARG_OPNDS[2], // mov rdx, rdx (optimized away)
|
||||
]);
|
||||
asm.compile_with_num_regs(&mut cb, 0);
|
||||
|
||||
assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {"
|
||||
0x0: mov r11, rsi
|
||||
0x3: mov rsi, rdi
|
||||
0x6: mov rdi, r11
|
||||
0x9: mov eax, 0
|
||||
0xe: call rax
|
||||
"});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reorder_c_args_two_cycles() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
// rdi and rsi form a cycle, and rdx and rcx form another cycle
|
||||
asm.ccall(0 as _, vec![
|
||||
C_ARG_OPNDS[1], // mov rdi, rsi
|
||||
C_ARG_OPNDS[0], // mov rsi, rdi
|
||||
C_ARG_OPNDS[3], // mov rdx, rcx
|
||||
C_ARG_OPNDS[2], // mov rcx, rdx
|
||||
]);
|
||||
asm.compile_with_num_regs(&mut cb, 0);
|
||||
|
||||
assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {"
|
||||
0x0: mov r11, rsi
|
||||
0x3: mov rsi, rdi
|
||||
0x6: mov rdi, r11
|
||||
0x9: mov r11, rcx
|
||||
0xc: mov rcx, rdx
|
||||
0xf: mov rdx, r11
|
||||
0x12: mov eax, 0
|
||||
0x17: call rax
|
||||
"});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reorder_c_args_large_cycle() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
// rdi, rsi, and rdx form a cycle
|
||||
asm.ccall(0 as _, vec![
|
||||
C_ARG_OPNDS[1], // mov rdi, rsi
|
||||
C_ARG_OPNDS[2], // mov rsi, rdx
|
||||
C_ARG_OPNDS[0], // mov rdx, rdi
|
||||
]);
|
||||
asm.compile_with_num_regs(&mut cb, 0);
|
||||
|
||||
assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {"
|
||||
0x0: mov r11, rsi
|
||||
0x3: mov rsi, rdx
|
||||
0x6: mov rdx, rdi
|
||||
0x9: mov rdi, r11
|
||||
0xc: mov eax, 0
|
||||
0x11: call rax
|
||||
"});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reorder_c_args_with_insn_out() {
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
let rax = asm.load(Opnd::UImm(1));
|
||||
let rcx = asm.load(Opnd::UImm(2));
|
||||
let rdx = asm.load(Opnd::UImm(3));
|
||||
// rcx and rdx form a cycle
|
||||
asm.ccall(0 as _, vec![
|
||||
rax, // mov rdi, rax
|
||||
rcx, // mov rsi, rcx
|
||||
rcx, // mov rdx, rcx
|
||||
rdx, // mov rcx, rdx
|
||||
]);
|
||||
asm.compile_with_num_regs(&mut cb, 3);
|
||||
|
||||
assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {"
|
||||
0x0: mov eax, 1
|
||||
0x5: mov ecx, 2
|
||||
0xa: mov edx, 3
|
||||
0xf: mov rdi, rax
|
||||
0x12: mov rsi, rcx
|
||||
0x15: mov r11, rcx
|
||||
0x18: mov rcx, rdx
|
||||
0x1b: mov rdx, r11
|
||||
0x1e: mov eax, 0
|
||||
0x23: call rax
|
||||
"});
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче