From b8846dd2f8042fc13a0f5ae17e2e2a6f400074dd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 9 Aug 2022 10:27:21 -0400 Subject: [PATCH] Load mem displacement when necessary on AArch64 (https://github.com/Shopify/ruby/pull/382) * LDR instruction for AArch64 * Split loads in arm64_split when memory address displacements do not fit --- yjit/src/asm/arm64/inst/load_literal.rs | 6 +- yjit/src/asm/arm64/inst/load_register.rs | 108 ++++++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + yjit/src/asm/arm64/mod.rs | 46 +++++-- yjit/src/backend/arm64/mod.rs | 152 +++++++++++++++++------ 5 files changed, 261 insertions(+), 53 deletions(-) create mode 100644 yjit/src/asm/arm64/inst/load_register.rs diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs index a49130c3eb..d2a5d57eea 100644 --- a/yjit/src/asm/arm64/inst/load_literal.rs +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -39,7 +39,7 @@ pub struct LoadLiteral { impl LoadLiteral { /// LDR (load literal) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en - pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self { + pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self { Self { rt, imm19, opc: num_bits.into() } } } @@ -75,14 +75,14 @@ mod tests { #[test] fn test_ldr_positive() { - let inst = LoadLiteral::ldr(0, 5, 64); + let inst = LoadLiteral::ldr_literal(0, 5, 64); let result: u32 = inst.into(); assert_eq!(0x580000a0, result); } #[test] fn test_ldr_negative() { - let inst = LoadLiteral::ldr(0, -5, 64); + let inst = LoadLiteral::ldr_literal(0, -5, 64); let result: u32 = inst.into(); assert_eq!(0x58ffff60, result); } diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs new file mode 100644 index 0000000000..3426b9ba5f --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_register.rs @@ -0,0 +1,108 @@ +/// Whether or not to shift the register. +enum S { + Shift = 1, + NoShift = 0 +} + +/// The option for this instruction. +enum Option { + UXTW = 0b010, + LSL = 0b011, + SXTW = 0b110, + SXTX = 0b111 +} + +/// The size of the operands of this instruction. +enum Size { + Size32 = 0b10, + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 1 1 1 0 | +/// | size. rm.............. option.. S rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadRegister { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// Whether or not to shift the value of the register. + s: S, + + /// The option associated with this instruction that controls the shift. + option: Option, + + /// The number of the offset register. + rm: u8, + + /// The size of the operands. + size: Size +} + +impl LoadRegister { + /// LDR + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en + pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadRegister) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b11 << 21) + | ((inst.rm as u32) << 16) + | ((inst.option as u32) << 13) + | ((inst.s as u32) << 12) + | (0b10 << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadRegister) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr() { + let inst = LoadRegister::ldr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8626820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index 42df2d137a..ab41464013 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -10,6 +10,7 @@ mod conditional; mod data_imm; mod data_reg; mod load_literal; +mod load_register; mod load_store; mod logical_imm; mod logical_reg; @@ -30,6 +31,7 @@ pub use conditional::Conditional; pub use data_imm::DataImm; pub use data_reg::DataReg; pub use load_literal::LoadLiteral; +pub use load_register::LoadRegister; pub use load_store::LoadStore; pub use logical_imm::LogicalImm; pub use logical_reg::LogicalReg; diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index 68be36c256..93b44dba4b 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -374,11 +374,26 @@ pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { cb.write_bytes(&bytes); } +/// LDR - load a memory address into a register with a register offset +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn, rm) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction.") + }; + + cb.write_bytes(&bytes); +} + /// LDR - load a PC-relative memory address into a register -pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { +pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { let bytes: [u8; 4] = match rt { A64Opnd::Reg(rt) => { - LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into() + LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into() }, _ => panic!("Invalid operand combination to ldr instruction."), }; @@ -386,12 +401,18 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { cb.write_bytes(&bytes); } +/// Whether or not a memory address displacement fits into the maximum number of +/// bits such that it can be used without loading it into a register first. +pub fn mem_disp_fits_bits(disp: i32) -> bool { + imm_fits_bits(disp.into(), 9) +} + /// LDR (post-index) - load a register from memory, update the base pointer after loading it pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -406,7 +427,7 @@ pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -426,7 +447,7 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { }, (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -441,7 +462,7 @@ pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() }, @@ -670,7 +691,7 @@ pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -685,7 +706,7 @@ pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); - assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -700,7 +721,7 @@ pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { let bytes: [u8; 4] = match (rt, rn) { (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); - assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() }, @@ -1024,7 +1045,12 @@ mod tests { #[test] fn test_ldr() { - check_bytes("40010058", |cb| ldr(cb, X0, 10)); + check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12)); + } + + #[test] + fn test_ldr_literal() { + check_bytes("40010058", |cb| ldr_literal(cb, X0, 10)); } #[test] diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index db2a30aec0..196523bf74 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -84,13 +84,66 @@ impl Assembler /// have no memory operands. fn arm64_split(mut self) -> Assembler { + /// When we're attempting to load a memory address into a register, the + /// displacement must fit into the maximum number of bits for an Op::Add + /// immediate. If it doesn't, we have to load the displacement into a + /// register first. + fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(Mem { base, disp, num_bits }) => { + if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { + asm.lea(opnd) + } else { + let disp = asm.load(Opnd::Imm(disp.into())); + let reg = match base { + MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), + MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } + }; + + asm.add(reg, disp) + } + }, + _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") + } + } + + /// When you're storing a register into a memory location or loading a + /// memory location into a register, the displacement from the base + /// register of the memory location must fit into 9 bits. If it doesn't, + /// then we need to load that memory address into a register first. + fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if mem_disp_fits_bits(mem.disp) { + opnd + } else { + let base = split_lea_operand(asm, opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only split memory addresses.") + } + } + + /// Any memory operands you're sending into an Op::Load instruction need + /// to be split in case their displacement doesn't fit into 9 bits. + fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(_) => { + let split_opnd = split_memory_address(asm, opnd); + asm.load(split_opnd) + }, + _ => asm.load(opnd) + } + } + /// Operands that take the place of bitmask immediates must follow a /// certain encoding. In this function we ensure that those operands /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - Opnd::Mem(_) => asm.load(opnd), + Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm <= 0 { asm.load(opnd) @@ -116,7 +169,8 @@ impl Assembler fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnd), + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(_) => asm.load(opnd), Opnd::UImm(uimm) => { if ShiftedImmediate::try_from(uimm).is_ok() { opnd @@ -128,24 +182,6 @@ impl Assembler } } - /// When you're storing a register into a memory location, the - /// displacement from the base register of the memory location must fit - /// into 9 bits. If it doesn't, then we need to load that memory address - /// into a register first. - fn split_store(asm: &mut Assembler, opnd: Opnd) -> Opnd { - match opnd { - Opnd::Mem(mem) => { - if imm_fits_bits(mem.disp.into(), 9) { - opnd - } else { - let base = asm.lea(opnd); - Opnd::mem(64, base, 0) - } - }, - _ => unreachable!("Can only store memory addresses.") - } - } - self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| { // Load all Value operands into registers that aren't already a part // of Load instructions. @@ -172,7 +208,7 @@ impl Assembler asm.add(reg_opnd, opnd1); }, _ => { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_shifted_immediate(asm, opnds[1]); asm.add(opnd0, opnd1); } @@ -189,7 +225,7 @@ impl Assembler asm.push_insn(op, vec![reg_opnd, opnd1], target, text, pos_marker); }, _ => { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); let opnd1 = split_bitmask_immediate(asm, opnds[1]); asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); } @@ -204,7 +240,7 @@ impl Assembler // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { - let value = asm.load(opnd); + let value = split_load_operand(asm, opnd); asm.mov(C_ARG_OPNDS[idx], value); } @@ -215,16 +251,15 @@ impl Assembler Op::Cmp => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; let opnd1 = split_shifted_immediate(asm, opnds[1]); - - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.cmp(opnd0, opnd1); }, Op::CRet => { if opnds[0] != Opnd::Reg(C_RET_REG) { - let value = asm.load(opnds[0]); + let value = split_load_operand(asm, opnds[0]); asm.mov(C_RET_OPND, value); } asm.cret(C_RET_OPND); @@ -234,7 +269,7 @@ impl Assembler let new_opnds = opnds.into_iter().map(|opnd| { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, - _ => asm.load(opnd) + _ => split_load_operand(asm, opnd) } }).collect(); @@ -243,7 +278,7 @@ impl Assembler Op::IncrCounter => { // We'll use LDADD later which only works with registers // ... Load pointer into register - let counter_addr = asm.lea(opnds[0]); + let counter_addr = split_lea_operand(asm, opnds[0]); // Load immediates into a register let addend = match opnds[1] { @@ -255,12 +290,15 @@ impl Assembler }, Op::JmpOpnd => { if let Opnd::Mem(_) = opnds[0] { - let opnd0 = asm.load(opnds[0]); + let opnd0 = split_load_operand(asm, opnds[0]); asm.jmp_opnd(opnd0); } else { asm.jmp_opnd(opnds[0]); } }, + Op::Load => { + split_load_operand(asm, opnds[0]); + }, Op::LoadSExt => { match opnds[0] { // We only want to sign extend if the operand is a @@ -295,7 +333,7 @@ impl Assembler // we'll use the normal mov instruction. match opnds[0] { Opnd::Mem(_) => { - let opnd0 = split_store(asm, opnds[0]); + let opnd0 = split_memory_address(asm, opnds[0]); asm.store(opnd0, value); }, Opnd::Reg(_) => { @@ -308,7 +346,7 @@ impl Assembler // The value that is being negated must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnds[0] { - Opnd::Mem(_) => asm.load(opnds[0]), + Opnd::Mem(_) => split_load_operand(asm, opnds[0]), _ => opnds[0] }; @@ -318,13 +356,13 @@ impl Assembler // The displacement for the STUR instruction can't be more // than 9 bits long. If it's longer, we need to load the // memory address into a register first. - let opnd0 = split_store(asm, opnds[0]); + let opnd0 = split_memory_address(asm, opnds[0]); // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match opnds[1] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1], - _ => asm.load(opnds[1]) + _ => split_load_operand(asm, opnds[1]) }; asm.store(opnd0, opnd1); @@ -332,19 +370,18 @@ impl Assembler Op::Sub => { let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; let opnd1 = split_shifted_immediate(asm, opnds[1]); - - asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker); + asm.sub(opnd0, opnd1); }, Op::Test => { // The value being tested must be in a register, so if it's // not already one we'll load it first. let opnd0 = match opnds[0] { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0], - _ => asm.load(opnds[0]) + _ => split_load_operand(asm, opnds[0]) }; // The second value must be either a register or an @@ -352,7 +389,6 @@ impl Assembler // immediate. If it's not one of those, we'll need to load // it first. let opnd1 = split_bitmask_immediate(asm, opnds[1]); - asm.test(opnd0, opnd1); }, _ => { @@ -611,7 +647,7 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr(cb, insn.out.into(), 2); + ldr_literal(cb, insn.out.into(), 2); b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); cb.write_bytes(&value.as_u64().to_le_bytes()); @@ -901,6 +937,42 @@ mod tests { asm.compile_with_num_regs(&mut cb, 1); } + #[test] + fn test_emit_load_mem_disp_fits_into_load() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 0)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that two instructions were written: LDUR and STUR. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: ADD, LDUR, and STUR. + assert_eq!(12, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_does_not_fit_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. + assert_eq!(16, cb.get_write_pos()); + } + #[test] fn test_emit_or() { let (mut asm, mut cb) = setup_asm();