YJIT: A64: Use CBZ/CBNZ to check for zero

* YJIT: A64: Add CBZ and CBNZ encoding functions

* YJIT: A64: Use CBZ/CBNZ to check for zero

Instead of emitting `cmp x0, #0` plus `b.z #target`, A64 offers Compare
and Branch on Zero for us to just do `cbz x0, #target`. This commit
utilizes that and the related CBNZ instruction when appropriate.

We check for zero most commonly in interrupt checks:

```diff
  # Insn: 0003 leave (stack_size: 1)
  # RUBY_VM_CHECK_INTS(ec)
  ldur w11, [x20, #0x20]
  -tst w11, w11
  -b.ne #0x109002164
  +cbnz w11, #0x1049021d0
```

* fix copy paste error

Co-authored-by: Randy Stauner <randy@r4s6.net>

---------

Co-authored-by: Randy Stauner <randy@r4s6.net>
This commit is contained in:
Alan Wu 2024-04-17 17:48:38 -04:00 коммит произвёл GitHub
Родитель 48846d6b8d
Коммит 8b81301536
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
4 изменённых файлов: 155 добавлений и 1 удалений

Просмотреть файл

@ -215,6 +215,9 @@ pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
imm_fits_bits(offset, 19)
}
/// CBZ and CBNZ also have a limit of 19 bits for the branch offset.
pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits;
/// B.cond - branch to target if condition is true
pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) {
assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less.");
@ -1096,6 +1099,48 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
cb.write_bytes(&bytes);
}
/// CBZ - branch if a register is zero
pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
cbz_cbnz(rt.num_bits, false, offset, rt.reg_no)
} else {
panic!("Invalid operand combination to cbz instruction.")
};
cb.write_bytes(&bytes);
}
/// CBNZ - branch if a register is non-zero
pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
cbz_cbnz(rt.num_bits, true, offset, rt.reg_no)
} else {
panic!("Invalid operand combination to cbnz instruction.")
};
cb.write_bytes(&bytes);
}
/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ)
/// with `op=1`.
///
/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero->
///
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
/// | sf 0 1 1 0 1 0 op |
/// | imm19........................................................... Rt.............. |
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] {
((Sf::from(num_bits) as u32) << 31 |
0b11010 << 25 |
u32::from(op) << 24 |
truncate_imm::<_, 19>(offset) << 5 |
rt as u32).to_le_bytes()
}
#[cfg(test)]
mod tests {
use super::*;
@ -1270,6 +1315,24 @@ mod tests {
check_bytes("80021fd6", |cb| br(cb, X20));
}
#[test]
fn test_cbz() {
let offset = InstructionOffset::from_insns(-1);
check_bytes("e0ffffb4e0ffff34", |cb| {
cbz(cb, X0, offset);
cbz(cb, W0, offset);
});
}
#[test]
fn test_cbnz() {
let offset = InstructionOffset::from_insns(2);
check_bytes("540000b554000035", |cb| {
cbnz(cb, X20, offset);
cbnz(cb, W20, offset);
});
}
#[test]
fn test_brk_none() {
check_bytes("000020d4", |cb| brk(cb, A64Opnd::None));

Просмотреть файл

@ -459,7 +459,34 @@ impl Assembler
}
asm.push_insn(insn);
},
}
// Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch.
ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } |
ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if {
let same_opnd_if_test = if let Insn::Test { .. } = insn {
left == right
} else {
true
};
same_opnd_if_test && if let Some(
Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target)
) = iterator.peek() {
matches!(target, Target::SideExit { .. })
} else {
false
}
} => {
let reg = split_load_operand(asm, *left);
match iterator.peek() {
Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)),
Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)),
_ => ()
}
iterator.map_insn_index(asm);
iterator.next_unmapped(); // Pop merged jump instruction
}
Insn::CCall { opnds, fptr, .. } => {
assert!(opnds.len() <= C_ARG_OPNDS.len());
@ -812,6 +839,45 @@ impl Assembler
};
}
/// Emit a CBZ or CBNZ which branches when a register is zero or non-zero
fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) {
if let Target::SideExitPtr(dst_ptr) = target {
let dst_addr = dst_ptr.as_offset();
let src_addr = cb.get_write_ptr().as_offset();
if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) {
// If the offset fits in one instruction, generate cbz or cbnz
let bytes = (dst_addr - src_addr) as i32;
if branch_if_zero {
cbz(cb, reg, InstructionOffset::from_bytes(bytes));
} else {
cbnz(cb, reg, InstructionOffset::from_bytes(bytes));
}
} else {
// Otherwise, we load the address into a register and
// use the branch register instruction. Note that because
// side exits should always be close, this form should be
// rare or impossible to see.
let dst_addr = dst_ptr.raw_addr(cb) as u64;
let load_insns: i32 = emit_load_size(dst_addr).into();
// Write out the inverse condition so that if
// it doesn't match it will skip over the
// instructions used for branching.
if branch_if_zero {
cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
} else {
cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
}
emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
br(cb, Assembler::SCRATCH0);
}
} else {
unreachable!("We should only generate Joz/Jonz with side-exit targets");
}
}
/// Emit a push instruction for the given operand by adding to the stack
/// pointer and then storing the given value.
fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
@ -1172,6 +1238,12 @@ impl Assembler
Insn::Jo(target) => {
emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?);
},
Insn::Joz(opnd, target) => {
emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?);
},
Insn::Jonz(opnd, target) => {
emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?);
},
Insn::IncrCounter { mem, value } => {
let label = cb.new_label("incr_counter_loop".to_string());
cb.write_label(label);

Просмотреть файл

@ -452,6 +452,12 @@ pub enum Insn {
/// Jump if zero
Jz(Target),
/// Jump if operand is zero (only used during lowering at the moment)
Joz(Opnd, Target),
/// Jump if operand is non-zero (only used during lowering at the moment)
Jonz(Opnd, Target),
// Add a label into the IR at the point that this instruction is added.
Label(Target),
@ -547,6 +553,9 @@ impl Insn {
Insn::Jo(target) |
Insn::Jz(target) |
Insn::Label(target) |
Insn::JoMul(target) |
Insn::Joz(_, target) |
Insn::Jonz(_, target) |
Insn::LeaJumpTarget { target, .. } => {
Some(target)
}
@ -595,6 +604,8 @@ impl Insn {
Insn::Jo(_) => "Jo",
Insn::JoMul(_) => "JoMul",
Insn::Jz(_) => "Jz",
Insn::Joz(..) => "Joz",
Insn::Jonz(..) => "Jonz",
Insn::Label(_) => "Label",
Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
Insn::Lea { .. } => "Lea",
@ -755,6 +766,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
Insn::CRet(opnd) |
@ -763,6 +775,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::LiveReg { opnd, .. } |
Insn::Load { opnd, .. } |
Insn::LoadSExt { opnd, .. } |
Insn::Joz(opnd, _) |
Insn::Jonz(opnd, _) |
Insn::Not { opnd, .. } => {
match self.idx {
0 => {
@ -857,6 +871,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
Insn::CRet(opnd) |
@ -865,6 +880,8 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::LiveReg { opnd, .. } |
Insn::Load { opnd, .. } |
Insn::LoadSExt { opnd, .. } |
Insn::Joz(opnd, _) |
Insn::Jonz(opnd, _) |
Insn::Not { opnd, .. } => {
match self.idx {
0 => {

Просмотреть файл

@ -796,6 +796,8 @@ impl Assembler
}
}
Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
// Atomically increment a counter at a given memory location
Insn::IncrCounter { mem, value } => {
assert!(matches!(mem, Opnd::Mem(_)));