зеркало из https://github.com/github/ruby.git
Change IncrCounter lowering on AArch64 (#6455)
* Change IncrCounter lowering on AArch64 Previously we were using LDADDAL which is not available on Graviton 1 chips. Instead, we're going to use an exclusive load/store group through the LDAXR/STLXR instructions. * Update yjit/src/backend/arm64/mod.rs Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
This commit is contained in:
Родитель
8f7f12ad64
Коммит
28433e9aa0
|
@ -0,0 +1,109 @@
|
|||
/// The operation being performed for this instruction.
|
||||
enum Op {
|
||||
Store = 0,
|
||||
Load = 1
|
||||
}
|
||||
|
||||
/// The size of the registers being operated on.
|
||||
enum Size {
|
||||
Size32 = 0b10,
|
||||
Size64 = 0b11
|
||||
}
|
||||
|
||||
/// A convenience function so that we can convert the number of bits of an
|
||||
/// register operand directly into a Size enum variant.
|
||||
impl From<u8> for Size {
|
||||
fn from(num_bits: u8) -> Self {
|
||||
match num_bits {
|
||||
64 => Size::Size64,
|
||||
32 => Size::Size32,
|
||||
_ => panic!("Invalid number of bits: {}", num_bits)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The struct that represents an A64 load or store exclusive instruction that
|
||||
/// can be encoded.
|
||||
///
|
||||
/// LDAXR/STLXR
|
||||
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
|
||||
/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
|
||||
/// | 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 |
|
||||
/// | size. op rs.............. rn.............. rt.............. |
|
||||
/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
|
||||
///
|
||||
pub struct LoadStoreExclusive {
|
||||
/// The number of the register to be loaded.
|
||||
rt: u8,
|
||||
|
||||
/// The base register with which to form the address.
|
||||
rn: u8,
|
||||
|
||||
/// The register to be used for the status result if it applies to this
|
||||
/// operation. Otherwise it's the zero register.
|
||||
rs: u8,
|
||||
|
||||
/// The operation being performed for this instruction.
|
||||
op: Op,
|
||||
|
||||
/// The size of the registers being operated on.
|
||||
size: Size
|
||||
}
|
||||
|
||||
impl LoadStoreExclusive {
|
||||
/// LDAXR
|
||||
/// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
|
||||
pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
|
||||
Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
|
||||
}
|
||||
|
||||
/// STLXR
|
||||
/// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
|
||||
pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
|
||||
Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
|
||||
}
|
||||
}
|
||||
|
||||
/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
|
||||
const FAMILY: u32 = 0b0100;
|
||||
|
||||
impl From<LoadStoreExclusive> for u32 {
|
||||
/// Convert an instruction into a 32-bit value.
|
||||
fn from(inst: LoadStoreExclusive) -> Self {
|
||||
0
|
||||
| ((inst.size as u32) << 30)
|
||||
| (FAMILY << 25)
|
||||
| ((inst.op as u32) << 22)
|
||||
| ((inst.rs as u32) << 16)
|
||||
| (0b111111 << 10)
|
||||
| ((inst.rn as u32) << 5)
|
||||
| (inst.rt as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LoadStoreExclusive> for [u8; 4] {
|
||||
/// Convert an instruction into a 4 byte array.
|
||||
fn from(inst: LoadStoreExclusive) -> [u8; 4] {
|
||||
let result: u32 = inst.into();
|
||||
result.to_le_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ldaxr() {
|
||||
let inst = LoadStoreExclusive::ldaxr(16, 0, 64);
|
||||
let result: u32 = inst.into();
|
||||
assert_eq!(0xc85ffc10, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stlxr() {
|
||||
let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64);
|
||||
let result: u32 = inst.into();
|
||||
assert_eq!(0xc811fc10, result);
|
||||
}
|
||||
}
|
|
@ -13,6 +13,7 @@ mod halfword_imm;
|
|||
mod load_literal;
|
||||
mod load_register;
|
||||
mod load_store;
|
||||
mod load_store_exclusive;
|
||||
mod logical_imm;
|
||||
mod logical_reg;
|
||||
mod mov;
|
||||
|
@ -36,6 +37,7 @@ pub use halfword_imm::HalfwordImm;
|
|||
pub use load_literal::LoadLiteral;
|
||||
pub use load_register::LoadRegister;
|
||||
pub use load_store::LoadStore;
|
||||
pub use load_store_exclusive::LoadStoreExclusive;
|
||||
pub use logical_imm::LogicalImm;
|
||||
pub use logical_reg::LogicalReg;
|
||||
pub use mov::Mov;
|
||||
|
|
|
@ -331,6 +331,20 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
|
|||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// LDAXR - atomic load with acquire semantics
|
||||
pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
|
||||
let bytes: [u8; 4] = match (rt, rn) {
|
||||
(A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
|
||||
assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
|
||||
|
||||
LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into()
|
||||
},
|
||||
_ => panic!("Invalid operand combination to ldaxr instruction."),
|
||||
};
|
||||
|
||||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// LDP (signed offset) - load a pair of registers from memory
|
||||
pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
|
||||
let bytes: [u8; 4] = match (rt1, rt2, rn) {
|
||||
|
@ -707,6 +721,21 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
|
|||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// STLXR - store a value to memory, release exclusive access
|
||||
pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
|
||||
let bytes: [u8; 4] = match (rs, rt, rn) {
|
||||
(A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
|
||||
assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register.");
|
||||
assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
|
||||
|
||||
LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into()
|
||||
},
|
||||
_ => panic!("Invalid operand combination to stlxr instruction.")
|
||||
};
|
||||
|
||||
cb.write_bytes(&bytes);
|
||||
}
|
||||
|
||||
/// STP (signed offset) - store a pair of registers to memory
|
||||
pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
|
||||
let bytes: [u8; 4] = match (rt1, rt2, rn) {
|
||||
|
@ -1183,6 +1212,11 @@ mod tests {
|
|||
check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ldaxr() {
|
||||
check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ldp() {
|
||||
check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
|
||||
|
@ -1333,6 +1367,11 @@ mod tests {
|
|||
check_bytes("80025fd6", |cb| ret(cb, X20));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stlxr() {
|
||||
check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stp() {
|
||||
check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
|
||||
|
|
|
@ -84,6 +84,14 @@ impl A64Opnd {
|
|||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap a register from an operand.
|
||||
pub fn unwrap_reg(&self) -> A64Reg {
|
||||
match self {
|
||||
A64Opnd::Reg(reg) => *reg,
|
||||
_ => panic!("Expected register operand")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// argument registers
|
||||
|
@ -102,6 +110,8 @@ pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 };
|
|||
pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
|
||||
pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 };
|
||||
pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 };
|
||||
pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 };
|
||||
pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 };
|
||||
|
||||
// callee-save registers
|
||||
pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 };
|
||||
|
|
|
@ -70,7 +70,8 @@ impl Assembler
|
|||
{
|
||||
// A special scratch register for intermediate processing.
|
||||
// This register is caller-saved (so we don't have to save it before using it)
|
||||
const SCRATCH0: A64Opnd = A64Opnd::Reg(X15_REG);
|
||||
const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG);
|
||||
const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
|
||||
|
||||
/// Get the list of registers from which we will allocate on this platform
|
||||
/// These are caller-saved registers
|
||||
|
@ -373,17 +374,12 @@ impl Assembler
|
|||
asm.csel_ge(opnd0, opnd1);
|
||||
},
|
||||
Insn::IncrCounter { mem, value } => {
|
||||
// We'll use LDADD later which only works with registers
|
||||
// ... Load pointer into register
|
||||
let counter_addr = split_lea_operand(asm, mem);
|
||||
|
||||
// Load immediates into a register
|
||||
let addend = match value {
|
||||
opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd),
|
||||
opnd => opnd,
|
||||
let counter_addr = match mem {
|
||||
Opnd::Mem(_) => split_lea_operand(asm, mem),
|
||||
_ => mem
|
||||
};
|
||||
|
||||
asm.incr_counter(counter_addr, addend);
|
||||
asm.incr_counter(counter_addr, value);
|
||||
},
|
||||
Insn::JmpOpnd(opnd) => {
|
||||
if let Opnd::Mem(_) = opnd {
|
||||
|
@ -936,7 +932,21 @@ impl Assembler
|
|||
emit_conditional_jump::<{Condition::VS}>(cb, *target);
|
||||
},
|
||||
Insn::IncrCounter { mem, value } => {
|
||||
ldaddal(cb, value.into(), value.into(), mem.into());
|
||||
let label = cb.new_label("incr_counter_loop".to_string());
|
||||
cb.write_label(label);
|
||||
|
||||
ldaxr(cb, Self::SCRATCH0, mem.into());
|
||||
add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
|
||||
|
||||
// The status register that gets used to track whether or
|
||||
// not the store was successful must be 32 bytes. Since we
|
||||
// store the SCRATCH registers as their 64-bit versions, we
|
||||
// need to rewrap it here.
|
||||
let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
|
||||
stlxr(cb, status, Self::SCRATCH0, mem.into());
|
||||
|
||||
cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
|
||||
emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
|
||||
},
|
||||
Insn::Breakpoint => {
|
||||
brk(cb, A64Opnd::None);
|
||||
|
|
Загрузка…
Ссылка в новой задаче