Change IncrCounter lowering on AArch64 (#6455)

* Change IncrCounter lowering on AArch64

Previously we were using LDADDAL which is not available on
Graviton 1 chips. Instead, we're going to use an exclusive
load/store group through the LDAXR/STLXR instructions.

* Update yjit/src/backend/arm64/mod.rs

Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
This commit is contained in:
Kevin Newton 2022-09-27 16:58:01 -04:00 коммит произвёл GitHub
Родитель 8f7f12ad64
Коммит 28433e9aa0
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 181 добавлений и 11 удалений

Просмотреть файл

@ -0,0 +1,109 @@
/// The operation being performed for this instruction.
enum Op {
Store = 0,
Load = 1
}
/// The size of the registers being operated on.
enum Size {
Size32 = 0b10,
Size64 = 0b11
}
/// A convenience function so that we can convert the number of bits of an
/// register operand directly into a Size enum variant.
impl From<u8> for Size {
fn from(num_bits: u8) -> Self {
match num_bits {
64 => Size::Size64,
32 => Size::Size32,
_ => panic!("Invalid number of bits: {}", num_bits)
}
}
}
/// The struct that represents an A64 load or store exclusive instruction that
/// can be encoded.
///
/// LDAXR/STLXR
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
/// | 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 |
/// | size. op rs.............. rn.............. rt.............. |
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
///
pub struct LoadStoreExclusive {
/// The number of the register to be loaded.
rt: u8,
/// The base register with which to form the address.
rn: u8,
/// The register to be used for the status result if it applies to this
/// operation. Otherwise it's the zero register.
rs: u8,
/// The operation being performed for this instruction.
op: Op,
/// The size of the registers being operated on.
size: Size
}
impl LoadStoreExclusive {
/// LDAXR
/// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
}
/// STLXR
/// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
}
}
/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
const FAMILY: u32 = 0b0100;
impl From<LoadStoreExclusive> for u32 {
/// Convert an instruction into a 32-bit value.
fn from(inst: LoadStoreExclusive) -> Self {
0
| ((inst.size as u32) << 30)
| (FAMILY << 25)
| ((inst.op as u32) << 22)
| ((inst.rs as u32) << 16)
| (0b111111 << 10)
| ((inst.rn as u32) << 5)
| (inst.rt as u32)
}
}
impl From<LoadStoreExclusive> for [u8; 4] {
/// Convert an instruction into a 4 byte array.
fn from(inst: LoadStoreExclusive) -> [u8; 4] {
let result: u32 = inst.into();
result.to_le_bytes()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ldaxr() {
let inst = LoadStoreExclusive::ldaxr(16, 0, 64);
let result: u32 = inst.into();
assert_eq!(0xc85ffc10, result);
}
#[test]
fn test_stlxr() {
let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64);
let result: u32 = inst.into();
assert_eq!(0xc811fc10, result);
}
}

Просмотреть файл

@ -13,6 +13,7 @@ mod halfword_imm;
mod load_literal;
mod load_register;
mod load_store;
mod load_store_exclusive;
mod logical_imm;
mod logical_reg;
mod mov;
@ -36,6 +37,7 @@ pub use halfword_imm::HalfwordImm;
pub use load_literal::LoadLiteral;
pub use load_register::LoadRegister;
pub use load_store::LoadStore;
pub use load_store_exclusive::LoadStoreExclusive;
pub use logical_imm::LogicalImm;
pub use logical_reg::LogicalReg;
pub use mov::Mov;

Просмотреть файл

@ -331,6 +331,20 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
cb.write_bytes(&bytes);
}
/// LDAXR - atomic load with acquire semantics
pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt, rn) {
(A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into()
},
_ => panic!("Invalid operand combination to ldaxr instruction."),
};
cb.write_bytes(&bytes);
}
/// LDP (signed offset) - load a pair of registers from memory
pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt1, rt2, rn) {
@ -707,6 +721,21 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
cb.write_bytes(&bytes);
}
/// STLXR - store a value to memory, release exclusive access
pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rs, rt, rn) {
(A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register.");
assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into()
},
_ => panic!("Invalid operand combination to stlxr instruction.")
};
cb.write_bytes(&bytes);
}
/// STP (signed offset) - store a pair of registers to memory
pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt1, rt2, rn) {
@ -1183,6 +1212,11 @@ mod tests {
check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
}
#[test]
fn test_ldaxr() {
check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11));
}
#[test]
fn test_ldp() {
check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
@ -1333,6 +1367,11 @@ mod tests {
check_bytes("80025fd6", |cb| ret(cb, X20));
}
#[test]
fn test_stlxr() {
check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12));
}
#[test]
fn test_stp() {
check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));

Просмотреть файл

@ -84,6 +84,14 @@ impl A64Opnd {
_ => false
}
}
/// Unwrap a register from an operand.
pub fn unwrap_reg(&self) -> A64Reg {
match self {
A64Opnd::Reg(reg) => *reg,
_ => panic!("Expected register operand")
}
}
}
// argument registers
@ -102,6 +110,8 @@ pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 };
pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 };
pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 };
pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 };
pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 };
// callee-save registers
pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 };

Просмотреть файл

@ -70,7 +70,8 @@ impl Assembler
{
// A special scratch register for intermediate processing.
// This register is caller-saved (so we don't have to save it before using it)
const SCRATCH0: A64Opnd = A64Opnd::Reg(X15_REG);
const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG);
const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
/// Get the list of registers from which we will allocate on this platform
/// These are caller-saved registers
@ -373,17 +374,12 @@ impl Assembler
asm.csel_ge(opnd0, opnd1);
},
Insn::IncrCounter { mem, value } => {
// We'll use LDADD later which only works with registers
// ... Load pointer into register
let counter_addr = split_lea_operand(asm, mem);
// Load immediates into a register
let addend = match value {
opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd),
opnd => opnd,
let counter_addr = match mem {
Opnd::Mem(_) => split_lea_operand(asm, mem),
_ => mem
};
asm.incr_counter(counter_addr, addend);
asm.incr_counter(counter_addr, value);
},
Insn::JmpOpnd(opnd) => {
if let Opnd::Mem(_) = opnd {
@ -936,7 +932,21 @@ impl Assembler
emit_conditional_jump::<{Condition::VS}>(cb, *target);
},
Insn::IncrCounter { mem, value } => {
ldaddal(cb, value.into(), value.into(), mem.into());
let label = cb.new_label("incr_counter_loop".to_string());
cb.write_label(label);
ldaxr(cb, Self::SCRATCH0, mem.into());
add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
// The status register that gets used to track whether or
// not the store was successful must be 32 bytes. Since we
// store the SCRATCH registers as their 64-bit versions, we
// need to rewrap it here.
let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
stlxr(cb, status, Self::SCRATCH0, mem.into());
cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
},
Insn::Breakpoint => {
brk(cb, A64Opnd::None);