Load mem displacement when necessary on AArch64 (https://github.com/Shopify/ruby/pull/382)

* LDR instruction for AArch64 * Split loads in arm64_split when memory address displacements do not fit
2022-08-09 10:27:21 -04:00 · 2022-08-09 10:27:21 -04:00 · b8846dd2f8
--- a/yjit/src/asm/arm64/inst/load_literal.rs
+++ b/yjit/src/asm/arm64/inst/load_literal.rs
@ -39,7 +39,7 @@ pub struct LoadLiteral {
 impl LoadLiteral {
    /// LDR (load literal)
    /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
-    pub fn ldr(rt: u8, imm19: i32, num_bits: u8) -> Self {
+    pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self {
        Self { rt, imm19, opc: num_bits.into() }
    }
 }
@ -75,14 +75,14 @@ mod tests {

    #[test]
    fn test_ldr_positive() {
-        let inst = LoadLiteral::ldr(0, 5, 64);
+        let inst = LoadLiteral::ldr_literal(0, 5, 64);
        let result: u32 = inst.into();
        assert_eq!(0x580000a0, result);
    }

    #[test]
    fn test_ldr_negative() {
-        let inst = LoadLiteral::ldr(0, -5, 64);
+        let inst = LoadLiteral::ldr_literal(0, -5, 64);
        let result: u32 = inst.into();
        assert_eq!(0x58ffff60, result);
    }
--- a/yjit/src/asm/arm64/inst/load_register.rs
+++ b/yjit/src/asm/arm64/inst/load_register.rs
@ -0,0 +1,108 @@
+/// Whether or not to shift the register.
+enum S {
+    Shift = 1,
+    NoShift = 0
+}
+
+/// The option for this instruction.
+enum Option {
+    UXTW = 0b010,
+    LSL = 0b011,
+    SXTW = 0b110,
+    SXTX = 0b111
+}
+
+/// The size of the operands of this instruction.
+enum Size {
+    Size32 = 0b10,
+    Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+    fn from(num_bits: u8) -> Self {
+        match num_bits {
+            64 => Size::Size64,
+            32 => Size::Size32,
+            _ => panic!("Invalid number of bits: {}", num_bits)
+        }
+    }
+}
+
+/// The struct that represents an A64 load instruction that can be encoded.
+///
+/// LDR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |        1  1    1  0  0  0    0  1  1                                   1  0                                   |
+/// | size.                                rm..............   option.. S          rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadRegister {
+    /// The number of the register to load the value into.
+    rt: u8,
+
+    /// The base register with which to form the address.
+    rn: u8,
+
+    /// Whether or not to shift the value of the register.
+    s: S,
+
+    /// The option associated with this instruction that controls the shift.
+    option: Option,
+
+    /// The number of the offset register.
+    rm: u8,
+
+    /// The size of the operands.
+    size: Size
+}
+
+impl LoadRegister {
+    /// LDR
+    /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en
+    pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+        Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() }
+    }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadRegister> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: LoadRegister) -> Self {
+        0
+        | ((inst.size as u32) << 30)
+        | (0b11 << 28)
+        | (FAMILY << 25)
+        | (0b11 << 21)
+        | ((inst.rm as u32) << 16)
+        | ((inst.option as u32) << 13)
+        | ((inst.s as u32) << 12)
+        | (0b10 << 10)
+        | ((inst.rn as u32) << 5)
+        | (inst.rt as u32)
+    }
+}
+
+impl From<LoadRegister> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: LoadRegister) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ldr() {
+        let inst = LoadRegister::ldr(0, 1, 2, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xf8626820, result);
+    }
+}
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@ -10,6 +10,7 @@ mod conditional;
 mod data_imm;
 mod data_reg;
 mod load_literal;
+mod load_register;
 mod load_store;
 mod logical_imm;
 mod logical_reg;
@ -30,6 +31,7 @@ pub use conditional::Conditional;
 pub use data_imm::DataImm;
 pub use data_reg::DataReg;
 pub use load_literal::LoadLiteral;
+pub use load_register::LoadRegister;
 pub use load_store::LoadStore;
 pub use logical_imm::LogicalImm;
 pub use logical_reg::LogicalReg;
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@ -374,11 +374,26 @@ pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
    cb.write_bytes(&bytes);
 }

+/// LDR - load a memory address into a register with a register offset
+pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+    let bytes: [u8; 4] = match (rt, rn, rm) {
+        (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+            assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
+            assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+            LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to ldr instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// LDR - load a PC-relative memory address into a register
-pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
+pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
    let bytes: [u8; 4] = match rt {
        A64Opnd::Reg(rt) => {
-            LoadLiteral::ldr(rt.reg_no, rn, rt.num_bits).into()
+            LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into()
        },
        _ => panic!("Invalid operand combination to ldr instruction."),
    };
@ -386,12 +401,18 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
    cb.write_bytes(&bytes);
 }

+/// Whether or not a memory address displacement fits into the maximum number of
+/// bits such that it can be used without loading it into a register first.
+pub fn mem_disp_fits_bits(disp: i32) -> bool {
+    imm_fits_bits(disp.into(), 9)
+}
+
 /// LDR (post-index) - load a register from memory, update the base pointer after loading it
 pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+            assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");

            LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -406,7 +427,7 @@ pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+            assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");

            LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -426,7 +447,7 @@ pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
        },
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less");
+            assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");

            LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -441,7 +462,7 @@ pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less");
+            assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");

            LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
        },
@ -670,7 +691,7 @@ pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+            assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");

            LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -685,7 +706,7 @@ pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+            assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");

            LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -700,7 +721,7 @@ pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
    let bytes: [u8; 4] = match (rt, rn) {
        (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
            assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
-            assert!(imm_fits_bits(rn.disp.into(), 9), "Expected displacement to be 9 bits or less");
+            assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");

            LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
        },
@ -1024,7 +1045,12 @@ mod tests {

    #[test]
    fn test_ldr() {
-        check_bytes("40010058", |cb| ldr(cb, X0, 10));
+        check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12));
+    }
+
+    #[test]
+    fn test_ldr_literal() {
+        check_bytes("40010058", |cb| ldr_literal(cb, X0, 10));
    }

    #[test]
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@ -84,13 +84,66 @@ impl Assembler
    /// have no memory operands.
    fn arm64_split(mut self) -> Assembler
    {
+        /// When we're attempting to load a memory address into a register, the
+        /// displacement must fit into the maximum number of bits for an Op::Add
+        /// immediate. If it doesn't, we have to load the displacement into a
+        /// register first.
+        fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+            match opnd {
+                Opnd::Mem(Mem { base, disp, num_bits }) => {
+                    if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() {
+                        asm.lea(opnd)
+                    } else {
+                        let disp = asm.load(Opnd::Imm(disp.into()));
+                        let reg = match base {
+                            MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }),
+                            MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits }
+                        };
+
+                        asm.add(reg, disp)
+                    }
+                },
+                _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.")
+            }
+        }
+
+        /// When you're storing a register into a memory location or loading a
+        /// memory location into a register, the displacement from the base
+        /// register of the memory location must fit into 9 bits. If it doesn't,
+        /// then we need to load that memory address into a register first.
+        fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+            match opnd {
+                Opnd::Mem(mem) => {
+                    if mem_disp_fits_bits(mem.disp) {
+                        opnd
+                    } else {
+                        let base = split_lea_operand(asm, opnd);
+                        Opnd::mem(64, base, 0)
+                    }
+                },
+                _ => unreachable!("Can only split memory addresses.")
+            }
+        }
+
+        /// Any memory operands you're sending into an Op::Load instruction need
+        /// to be split in case their displacement doesn't fit into 9 bits.
+        fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+            match opnd {
+                Opnd::Mem(_) => {
+                    let split_opnd = split_memory_address(asm, opnd);
+                    asm.load(split_opnd)
+                },
+                _ => asm.load(opnd)
+            }
+        }
+
        /// Operands that take the place of bitmask immediates must follow a
        /// certain encoding. In this function we ensure that those operands
        /// do follow that encoding, and if they don't then we load them first.
        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
            match opnd {
                Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
-                Opnd::Mem(_) => asm.load(opnd),
+                Opnd::Mem(_) => split_load_operand(asm, opnd),
                Opnd::Imm(imm) => {
                    if imm <= 0 {
                        asm.load(opnd)
@ -116,7 +169,8 @@ impl Assembler
        fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
            match opnd {
                Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
-                Opnd::Mem(_) | Opnd::Imm(_) => asm.load(opnd),
+                Opnd::Mem(_) => split_load_operand(asm, opnd),
+                Opnd::Imm(_) => asm.load(opnd),
                Opnd::UImm(uimm) => {
                    if ShiftedImmediate::try_from(uimm).is_ok() {
                        opnd
@ -128,24 +182,6 @@ impl Assembler
            }
        }

-        /// When you're storing a register into a memory location, the
-        /// displacement from the base register of the memory location must fit
-        /// into 9 bits. If it doesn't, then we need to load that memory address
-        /// into a register first.
-        fn split_store(asm: &mut Assembler, opnd: Opnd) -> Opnd {
-            match opnd {
-                Opnd::Mem(mem) => {
-                    if imm_fits_bits(mem.disp.into(), 9) {
-                        opnd
-                    } else {
-                        let base = asm.lea(opnd);
-                        Opnd::mem(64, base, 0)
-                    }
-                },
-                _ => unreachable!("Can only store memory addresses.")
-            }
-        }
-
        self.forward_pass(|asm, index, op, opnds, target, text, pos_marker, original_opnds| {
            // Load all Value operands into registers that aren't already a part
            // of Load instructions.
@ -172,7 +208,7 @@ impl Assembler
                            asm.add(reg_opnd, opnd1);
                        },
                        _ => {
-                            let opnd0 = asm.load(opnds[0]);
+                            let opnd0 = split_load_operand(asm, opnds[0]);
                            let opnd1 = split_shifted_immediate(asm, opnds[1]);
                            asm.add(opnd0, opnd1);
                        }
@ -189,7 +225,7 @@ impl Assembler
                            asm.push_insn(op, vec![reg_opnd, opnd1], target, text, pos_marker);
                        },
                        _ => {
-                            let opnd0 = asm.load(opnds[0]);
+                            let opnd0 = split_load_operand(asm, opnds[0]);
                            let opnd1 = split_bitmask_immediate(asm, opnds[1]);
                            asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker);
                        }
@ -204,7 +240,7 @@ impl Assembler
                    // Note: the iteration order is reversed to avoid corrupting x0,
                    // which is both the return value and first argument register
                    for (idx, opnd) in opnds.into_iter().enumerate().rev() {
-                        let value = asm.load(opnd);
+                        let value = split_load_operand(asm, opnd);
                        asm.mov(C_ARG_OPNDS[idx], value);
                    }

@ -215,16 +251,15 @@ impl Assembler
                Op::Cmp => {
                    let opnd0 = match opnds[0] {
                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0],
-                        _ => asm.load(opnds[0])
+                        _ => split_load_operand(asm, opnds[0])
                    };

                    let opnd1 = split_shifted_immediate(asm, opnds[1]);
-
-                    asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker);
+                    asm.cmp(opnd0, opnd1);
                },
                Op::CRet => {
                    if opnds[0] != Opnd::Reg(C_RET_REG) {
-                        let value = asm.load(opnds[0]);
+                        let value = split_load_operand(asm, opnds[0]);
                        asm.mov(C_RET_OPND, value);
                    }
                    asm.cret(C_RET_OPND);
@ -234,7 +269,7 @@ impl Assembler
                    let new_opnds = opnds.into_iter().map(|opnd| {
                        match opnd {
                            Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
-                            _ => asm.load(opnd)
+                            _ => split_load_operand(asm, opnd)
                        }
                    }).collect();

@ -243,7 +278,7 @@ impl Assembler
                Op::IncrCounter => {
                    // We'll use LDADD later which only works with registers
                    // ... Load pointer into register
-                    let counter_addr = asm.lea(opnds[0]);
+                    let counter_addr = split_lea_operand(asm, opnds[0]);

                    // Load immediates into a register
                    let addend = match opnds[1] {
@ -255,12 +290,15 @@ impl Assembler
                },
                Op::JmpOpnd => {
                    if let Opnd::Mem(_) = opnds[0] {
-                        let opnd0 = asm.load(opnds[0]);
+                        let opnd0 = split_load_operand(asm, opnds[0]);
                        asm.jmp_opnd(opnd0);
                    } else {
                        asm.jmp_opnd(opnds[0]);
                    }
                },
+                Op::Load => {
+                    split_load_operand(asm, opnds[0]);
+                },
                Op::LoadSExt => {
                    match opnds[0] {
                        // We only want to sign extend if the operand is a
@ -295,7 +333,7 @@ impl Assembler
                    // we'll use the normal mov instruction.
                    match opnds[0] {
                        Opnd::Mem(_) => {
-                            let opnd0 = split_store(asm, opnds[0]);
+                            let opnd0 = split_memory_address(asm, opnds[0]);
                            asm.store(opnd0, value);
                        },
                        Opnd::Reg(_) => {
@ -308,7 +346,7 @@ impl Assembler
                    // The value that is being negated must be in a register, so
                    // if we get anything else we need to load it first.
                    let opnd0 = match opnds[0] {
-                        Opnd::Mem(_) => asm.load(opnds[0]),
+                        Opnd::Mem(_) => split_load_operand(asm, opnds[0]),
                        _ => opnds[0]
                    };

@ -318,13 +356,13 @@ impl Assembler
                    // The displacement for the STUR instruction can't be more
                    // than 9 bits long. If it's longer, we need to load the
                    // memory address into a register first.
-                    let opnd0 = split_store(asm, opnds[0]);
+                    let opnd0 = split_memory_address(asm, opnds[0]);

                    // The value being stored must be in a register, so if it's
                    // not already one we'll load it first.
                    let opnd1 = match opnds[1] {
                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[1],
-                        _ => asm.load(opnds[1])
+                        _ => split_load_operand(asm, opnds[1])
                    };

                    asm.store(opnd0, opnd1);
@ -332,19 +370,18 @@ impl Assembler
                Op::Sub => {
                    let opnd0 = match opnds[0] {
                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0],
-                        _ => asm.load(opnds[0])
+                        _ => split_load_operand(asm, opnds[0])
                    };

                    let opnd1 = split_shifted_immediate(asm, opnds[1]);
-
-                    asm.push_insn(op, vec![opnd0, opnd1], target, text, pos_marker);
+                    asm.sub(opnd0, opnd1);
                },
                Op::Test => {
                    // The value being tested must be in a register, so if it's
                    // not already one we'll load it first.
                    let opnd0 = match opnds[0] {
                        Opnd::Reg(_) | Opnd::InsnOut { .. } => opnds[0],
-                        _ => asm.load(opnds[0])
+                        _ => split_load_operand(asm, opnds[0])
                    };

                    // The second value must be either a register or an
@ -352,7 +389,6 @@ impl Assembler
                    // immediate. If it's not one of those, we'll need to load
                    // it first.
                    let opnd1 = split_bitmask_immediate(asm, opnds[1]);
-
                    asm.test(opnd0, opnd1);
                },
                _ => {
@ -611,7 +647,7 @@ impl Assembler
                            // references to GC'd Value operands. If the value
                            // being loaded is a heap object, we'll report that
                            // back out to the gc_offsets list.
-                            ldr(cb, insn.out.into(), 2);
+                            ldr_literal(cb, insn.out.into(), 2);
                            b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4));
                            cb.write_bytes(&value.as_u64().to_le_bytes());

@ -901,6 +937,42 @@ mod tests {
        asm.compile_with_num_regs(&mut cb, 1);
    }

+    #[test]
+    fn test_emit_load_mem_disp_fits_into_load() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let opnd = asm.load(Opnd::mem(64, SP, 0));
+        asm.store(Opnd::mem(64, SP, 0), opnd);
+        asm.compile_with_num_regs(&mut cb, 1);
+
+        // Assert that two instructions were written: LDUR and STUR.
+        assert_eq!(8, cb.get_write_pos());
+    }
+
+    #[test]
+    fn test_emit_load_mem_disp_fits_into_add() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let opnd = asm.load(Opnd::mem(64, SP, 1 << 10));
+        asm.store(Opnd::mem(64, SP, 0), opnd);
+        asm.compile_with_num_regs(&mut cb, 1);
+
+        // Assert that three instructions were written: ADD, LDUR, and STUR.
+        assert_eq!(12, cb.get_write_pos());
+    }
+
+    #[test]
+    fn test_emit_load_mem_disp_does_not_fit_into_add() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1));
+        asm.store(Opnd::mem(64, SP, 0), opnd);
+        asm.compile_with_num_regs(&mut cb, 1);
+
+        // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR.
+        assert_eq!(16, cb.get_write_pos());
+    }
+
    #[test]
    fn test_emit_or() {
        let (mut asm, mut cb) = setup_asm();