зеркало из https://github.com/github/ruby.git
YJIT: A64: Use CBZ/CBNZ to check for zero
* YJIT: A64: Add CBZ and CBNZ encoding functions * YJIT: A64: Use CBZ/CBNZ to check for zero Instead of emitting `cmp x0, #0` plus `b.z #target`, A64 offers Compare and Branch on Zero for us to just do `cbz x0, #target`. This commit utilizes that and the related CBNZ instruction when appropriate. We check for zero most commonly in interrupt checks: ```diff # Insn: 0003 leave (stack_size: 1) # RUBY_VM_CHECK_INTS(ec) ldur w11, [x20, #0x20] -tst w11, w11 -b.ne #0x109002164 +cbnz w11, #0x1049021d0 ``` * fix copy paste error Co-authored-by: Randy Stauner <randy@r4s6.net> --------- Co-authored-by: Randy Stauner <randy@r4s6.net>
This commit is contained in:
Родитель
48846d6b8d
Коммит
8b81301536
|
@ -215,6 +215,9 @@ pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
|
|||
imm_fits_bits(offset, 19)
|
||||
}
|
||||
|
||||
/// CBZ and CBNZ also have a limit of 19 bits for the branch offset.
|
||||
pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits;
|
||||
|
||||
/// B.cond - branch to target if condition is true
|
||||
pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) {
|
||||
assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less.");
|
||||
|
@ -1096,6 +1099,48 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
|
|||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// CBZ - branch if a register is zero
|
||||
pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
|
||||
assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
|
||||
let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
|
||||
cbz_cbnz(rt.num_bits, false, offset, rt.reg_no)
|
||||
} else {
|
||||
panic!("Invalid operand combination to cbz instruction.")
|
||||
};
|
||||
|
||||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// CBNZ - branch if a register is non-zero
|
||||
pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
|
||||
assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
|
||||
let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
|
||||
cbz_cbnz(rt.num_bits, true, offset, rt.reg_no)
|
||||
} else {
|
||||
panic!("Invalid operand combination to cbnz instruction.")
|
||||
};
|
||||
|
||||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ)
|
||||
/// with `op=1`.
|
||||
///
|
||||
/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero->
|
||||
///
|
||||
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
|
||||
/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
|
||||
/// | sf 0 1 1 0 1 0 op |
|
||||
/// | imm19........................................................... Rt.............. |
|
||||
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
|
||||
fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] {
|
||||
((Sf::from(num_bits) as u32) << 31 |
|
||||
0b11010 << 25 |
|
||||
u32::from(op) << 24 |
|
||||
truncate_imm::<_, 19>(offset) << 5 |
|
||||
rt as u32).to_le_bytes()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -1270,6 +1315,24 @@ mod tests {
|
|||
check_bytes("80021fd6", |cb| br(cb, X20));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cbz() {
|
||||
let offset = InstructionOffset::from_insns(-1);
|
||||
check_bytes("e0ffffb4e0ffff34", |cb| {
|
||||
cbz(cb, X0, offset);
|
||||
cbz(cb, W0, offset);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cbnz() {
|
||||
let offset = InstructionOffset::from_insns(2);
|
||||
check_bytes("540000b554000035", |cb| {
|
||||
cbnz(cb, X20, offset);
|
||||
cbnz(cb, W20, offset);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_brk_none() {
|
||||
check_bytes("000020d4", |cb| brk(cb, A64Opnd::None));
|
||||
|
|
|
@ -459,7 +459,34 @@ impl Assembler
|
|||
}
|
||||
|
||||
asm.push_insn(insn);
|
||||
},
|
||||
}
|
||||
// Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch.
|
||||
ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } |
|
||||
ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if {
|
||||
let same_opnd_if_test = if let Insn::Test { .. } = insn {
|
||||
left == right
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
same_opnd_if_test && if let Some(
|
||||
Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target)
|
||||
) = iterator.peek() {
|
||||
matches!(target, Target::SideExit { .. })
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} => {
|
||||
let reg = split_load_operand(asm, *left);
|
||||
match iterator.peek() {
|
||||
Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)),
|
||||
Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)),
|
||||
_ => ()
|
||||
}
|
||||
|
||||
iterator.map_insn_index(asm);
|
||||
iterator.next_unmapped(); // Pop merged jump instruction
|
||||
}
|
||||
Insn::CCall { opnds, fptr, .. } => {
|
||||
assert!(opnds.len() <= C_ARG_OPNDS.len());
|
||||
|
||||
|
@ -812,6 +839,45 @@ impl Assembler
|
|||
};
|
||||
}
|
||||
|
||||
/// Emit a CBZ or CBNZ which branches when a register is zero or non-zero
|
||||
fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) {
|
||||
if let Target::SideExitPtr(dst_ptr) = target {
|
||||
let dst_addr = dst_ptr.as_offset();
|
||||
let src_addr = cb.get_write_ptr().as_offset();
|
||||
|
||||
if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) {
|
||||
// If the offset fits in one instruction, generate cbz or cbnz
|
||||
let bytes = (dst_addr - src_addr) as i32;
|
||||
if branch_if_zero {
|
||||
cbz(cb, reg, InstructionOffset::from_bytes(bytes));
|
||||
} else {
|
||||
cbnz(cb, reg, InstructionOffset::from_bytes(bytes));
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we load the address into a register and
|
||||
// use the branch register instruction. Note that because
|
||||
// side exits should always be close, this form should be
|
||||
// rare or impossible to see.
|
||||
let dst_addr = dst_ptr.raw_addr(cb) as u64;
|
||||
let load_insns: i32 = emit_load_size(dst_addr).into();
|
||||
|
||||
// Write out the inverse condition so that if
|
||||
// it doesn't match it will skip over the
|
||||
// instructions used for branching.
|
||||
if branch_if_zero {
|
||||
cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
|
||||
} else {
|
||||
cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
|
||||
}
|
||||
emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
|
||||
br(cb, Assembler::SCRATCH0);
|
||||
|
||||
}
|
||||
} else {
|
||||
unreachable!("We should only generate Joz/Jonz with side-exit targets");
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a push instruction for the given operand by adding to the stack
|
||||
/// pointer and then storing the given value.
|
||||
fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
|
||||
|
@ -1172,6 +1238,12 @@ impl Assembler
|
|||
Insn::Jo(target) => {
|
||||
emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?);
|
||||
},
|
||||
Insn::Joz(opnd, target) => {
|
||||
emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?);
|
||||
},
|
||||
Insn::Jonz(opnd, target) => {
|
||||
emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?);
|
||||
},
|
||||
Insn::IncrCounter { mem, value } => {
|
||||
let label = cb.new_label("incr_counter_loop".to_string());
|
||||
cb.write_label(label);
|
||||
|
|
|
@ -452,6 +452,12 @@ pub enum Insn {
|
|||
/// Jump if zero
|
||||
Jz(Target),
|
||||
|
||||
/// Jump if operand is zero (only used during lowering at the moment)
|
||||
Joz(Opnd, Target),
|
||||
|
||||
/// Jump if operand is non-zero (only used during lowering at the moment)
|
||||
Jonz(Opnd, Target),
|
||||
|
||||
// Add a label into the IR at the point that this instruction is added.
|
||||
Label(Target),
|
||||
|
||||
|
@ -547,6 +553,9 @@ impl Insn {
|
|||
Insn::Jo(target) |
|
||||
Insn::Jz(target) |
|
||||
Insn::Label(target) |
|
||||
Insn::JoMul(target) |
|
||||
Insn::Joz(_, target) |
|
||||
Insn::Jonz(_, target) |
|
||||
Insn::LeaJumpTarget { target, .. } => {
|
||||
Some(target)
|
||||
}
|
||||
|
@ -595,6 +604,8 @@ impl Insn {
|
|||
Insn::Jo(_) => "Jo",
|
||||
Insn::JoMul(_) => "JoMul",
|
||||
Insn::Jz(_) => "Jz",
|
||||
Insn::Joz(..) => "Joz",
|
||||
Insn::Jonz(..) => "Jonz",
|
||||
Insn::Label(_) => "Label",
|
||||
Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
|
||||
Insn::Lea { .. } => "Lea",
|
||||
|
@ -755,6 +766,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
|
|||
Insn::LeaJumpTarget { .. } |
|
||||
Insn::PadInvalPatch |
|
||||
Insn::PosMarker(_) => None,
|
||||
|
||||
Insn::CPopInto(opnd) |
|
||||
Insn::CPush(opnd) |
|
||||
Insn::CRet(opnd) |
|
||||
|
@ -763,6 +775,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
|
|||
Insn::LiveReg { opnd, .. } |
|
||||
Insn::Load { opnd, .. } |
|
||||
Insn::LoadSExt { opnd, .. } |
|
||||
Insn::Joz(opnd, _) |
|
||||
Insn::Jonz(opnd, _) |
|
||||
Insn::Not { opnd, .. } => {
|
||||
match self.idx {
|
||||
0 => {
|
||||
|
@ -857,6 +871,7 @@ impl<'a> InsnOpndMutIterator<'a> {
|
|||
Insn::LeaJumpTarget { .. } |
|
||||
Insn::PadInvalPatch |
|
||||
Insn::PosMarker(_) => None,
|
||||
|
||||
Insn::CPopInto(opnd) |
|
||||
Insn::CPush(opnd) |
|
||||
Insn::CRet(opnd) |
|
||||
|
@ -865,6 +880,8 @@ impl<'a> InsnOpndMutIterator<'a> {
|
|||
Insn::LiveReg { opnd, .. } |
|
||||
Insn::Load { opnd, .. } |
|
||||
Insn::LoadSExt { opnd, .. } |
|
||||
Insn::Joz(opnd, _) |
|
||||
Insn::Jonz(opnd, _) |
|
||||
Insn::Not { opnd, .. } => {
|
||||
match self.idx {
|
||||
0 => {
|
||||
|
|
|
@ -796,6 +796,8 @@ impl Assembler
|
|||
}
|
||||
}
|
||||
|
||||
Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
|
||||
|
||||
// Atomically increment a counter at a given memory location
|
||||
Insn::IncrCounter { mem, value } => {
|
||||
assert!(matches!(mem, Opnd::Mem(_)));
|
||||
|
|
Загрузка…
Ссылка в новой задаче