YJIT: fix ARM64 bitmask encoding for 32 bit registers (#6503)

For logical instructions such as AND, there is a constraint that the N part of the bitmask immediate must be 0. We weren't respecting this condition previously and were silently emitting undefined instructions. Check for this condition in the assembler and tweak the backend to correctly detect whether a number could be encoded as an immediate in a 32 bit logical instruction. Due to the nature of the immediate encoding, the same numeric value encodes differently depending on the size of the register the instruction works on. We currently don't have cases where we use 32 bit immediates but we ran into this encoding issue during development.
2022-10-06 18:41:38 -04:00 · 2022-10-06 18:41:38 -04:00 · 43e87c7e8a
--- a/yjit/src/asm/arm64/arg/bitmask_imm.rs
+++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs
@ -72,13 +72,31 @@ impl TryFrom<u64> for BitmaskImmediate {
    }
 }

-impl From<BitmaskImmediate> for u32 {
+impl BitmaskImmediate {
+    /// Attempt to make a BitmaskImmediate for a 32 bit register.
+    /// The result has N==0, which is required for some 32-bit instructions.
+    /// Note that the exact same BitmaskImmediate produces different values
+    /// depending on the size of the target register.
+    pub fn new_32b_reg(value: u32) -> Result<Self, ()> {
+        // The same bit pattern replicated to u64
+        let value = value as u64;
+        let replicated: u64 = (value << 32) | value;
+        let converted = Self::try_from(replicated);
+        if let Ok(ref imm) = converted {
+            assert_eq!(0, imm.n);
+        }
+
+        converted
+    }
+}
+
+impl BitmaskImmediate {
    /// Encode a bitmask immediate into a 32-bit value.
-    fn from(bitmask: BitmaskImmediate) -> Self {
+    pub fn encode(self) -> u32 {
        0
-        | ((bitmask.n as u32) << 12)
-        | ((bitmask.immr as u32) << 6)
-        | (bitmask.imms as u32)
+        | ((self.n as u32) << 12)
+        | ((self.immr as u32) << 6)
+        | (self.imms as u32)
    }
 }

@ -96,7 +114,7 @@ mod tests {
    #[test]
    fn test_negative() {
        let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap();
-        let encoded: u32 = bitmask.into();
+        let encoded: u32 = bitmask.encode();
        assert_eq!(7998, encoded);
    }

@ -207,4 +225,31 @@ mod tests {
        let bitmask = BitmaskImmediate::try_from(u64::MAX);
        assert!(matches!(bitmask, Err(())));
    }
+
+    #[test]
+    fn test_all_valid_32b_pattern() {
+        let mut patterns = vec![];
+        for pattern_size in [2, 4, 8, 16, 32_u64] {
+            for ones_count in 1..pattern_size {
+                for rotation in 0..pattern_size {
+                    let ones = (1_u64 << ones_count) - 1;
+                    let rotated = (ones >> rotation) |
+                        ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation));
+                    let mut replicated = rotated;
+                    let mut shift = pattern_size;
+                    while shift < 32 {
+                        replicated |= replicated << shift;
+                        shift *= 2;
+                    }
+                    let replicated: u32 = replicated.try_into().unwrap();
+                    assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok());
+                    patterns.push(replicated);
+                }
+            }
+        }
+        patterns.sort();
+        patterns.dedup();
+        // Up to {size}-1 ones, and a total of {size} possible rotations.
+        assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len());
+    }
 }
--- a/yjit/src/asm/arm64/inst/logical_imm.rs
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@ -86,7 +86,7 @@ const FAMILY: u32 = 0b1001;
 impl From<LogicalImm> for u32 {
    /// Convert an instruction into a 32-bit value.
    fn from(inst: LogicalImm) -> Self {
-        let imm: u32 = inst.imm.into();
+        let imm: u32 = inst.imm.encode();

        0
        | ((inst.sf as u32) << 31)
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@ -135,8 +135,13 @@ pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
        },
        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
            assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();

-            LogicalImm::and(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::and(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to and instruction."),
    };
@ -157,8 +162,13 @@ pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
        },
        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
            assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();

-            LogicalImm::ands(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::ands(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to ands instruction."),
    };
@ -305,8 +315,13 @@ pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
        },
        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
            assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();

-            LogicalImm::eor(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::eor(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to eor instruction."),
    };
@ -604,7 +619,13 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
            LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
        },
        (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
-            LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::mov(rd.reg_no, bitmask_imm, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to mov instruction")
    };
@ -712,8 +733,13 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
        },
        (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
            assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+            let bitmask_imm = if rd.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();

-            LogicalImm::orr(rd.reg_no, rn.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
+            LogicalImm::orr(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to orr instruction."),
    };
@ -995,7 +1021,13 @@ pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
            LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into()
        },
        (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
-            LogicalImm::tst(rn.reg_no, imm.try_into().unwrap(), rn.num_bits).into()
+            let bitmask_imm = if rn.num_bits == 32 {
+                BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+            } else {
+                imm.try_into()
+            }.unwrap();
+
+            LogicalImm::tst(rn.reg_no, bitmask_imm, rn.num_bits).into()
        },
        _ => panic!("Invalid operand combination to tst instruction."),
    };
@ -1097,6 +1129,11 @@ mod tests {
        check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7)));
    }

+    #[test]
+    fn test_and_32b_immedaite() {
+        check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff)));
+    }
+
    #[test]
    fn test_ands_register() {
        check_bytes("200002ea", |cb| ands(cb, X0, X1, X2));
@ -1207,6 +1244,11 @@ mod tests {
        check_bytes("6a0940d2", |cb| eor(cb, X10, X11, A64Opnd::new_uimm(7)));
    }

+    #[test]
+    fn test_eor_32b_immediate() {
+        check_bytes("29040152", |cb| eor(cb, W9, W1, A64Opnd::new_uimm(0x80000001)));
+    }
+
    #[test]
    fn test_ldaddal() {
        check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
@ -1302,6 +1344,10 @@ mod tests {
        check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555)));
    }

+    #[test]
+    fn test_mov_32b_immediate() {
+        check_bytes("ea070132", |cb| mov(cb, W10, A64Opnd::new_uimm(0x80000001)));
+    }
    #[test]
    fn test_mov_into_sp() {
        check_bytes("1f000091", |cb| mov(cb, X31, X0));
@ -1357,6 +1403,11 @@ mod tests {
        check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7)));
    }

+    #[test]
+    fn test_orr_32b_immediate() {
+        check_bytes("6a010032", |cb| orr(cb, W10, W11, A64Opnd::new_uimm(1)));
+    }
+
    #[test]
    fn test_ret_none() {
        check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None));
@ -1481,4 +1532,9 @@ mod tests {
    fn test_tst_immediate() {
        check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7)));
    }
+
+    #[test]
+    fn test_tst_32b_immediate() {
+        check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff)));
+    }
 }
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@ -157,24 +157,31 @@ impl Assembler
        /// Operands that take the place of bitmask immediates must follow a
        /// certain encoding. In this function we ensure that those operands
        /// do follow that encoding, and if they don't then we load them first.
-        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+        fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
            match opnd {
                Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
                Opnd::Mem(_) => split_load_operand(asm, opnd),
                Opnd::Imm(imm) => {
                    if imm <= 0 {
                        asm.load(opnd)
-                    } else if BitmaskImmediate::try_from(imm as u64).is_ok() {
+                    } else if (dest_num_bits == 64 &&
+                                BitmaskImmediate::try_from(imm as u64).is_ok()) ||
+                            (dest_num_bits == 32 &&
+                                u32::try_from(imm).is_ok() &&
+                                BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) {
                        Opnd::UImm(imm as u64)
                    } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                    }
                },
                Opnd::UImm(uimm) => {
-                    if BitmaskImmediate::try_from(uimm).is_ok() {
+                    if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) ||
+                        (dest_num_bits == 32 &&
+                            u32::try_from(uimm).is_ok() &&
+                            BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) {
                        opnd
                    } else {
-                        asm.load(opnd)
+                        asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
                    }
                },
                Opnd::None | Opnd::Value(_) => unreachable!()
@ -208,12 +215,12 @@ impl Assembler
                },
                (reg_opnd @ Opnd::Reg(_), other_opnd) |
                (other_opnd, reg_opnd @ Opnd::Reg(_)) => {
-                    let opnd1 = split_bitmask_immediate(asm, other_opnd);
+                    let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits());
                    (reg_opnd, opnd1)
                },
                _ => {
                    let opnd0 = split_load_operand(asm, opnd0);
-                    let opnd1 = split_bitmask_immediate(asm, opnd1);
+                    let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits());
                    (opnd0, opnd1)
                }
            }
@ -449,7 +456,7 @@ impl Assembler
                        // register or an immediate that can be encoded as a
                        // bitmask immediate. Otherwise, we'll need to split the
                        // move into multiple instructions.
-                        _ => split_bitmask_immediate(asm, src)
+                        _ => split_bitmask_immediate(asm, src, dest.rm_num_bits())
                    };

                    // If we're attempting to load into a memory operand, then
@ -508,7 +515,7 @@ impl Assembler
                    // unsigned immediate that can be encoded as a bitmask
                    // immediate. If it's not one of those, we'll need to load
                    // it first.
-                    let opnd1 = split_bitmask_immediate(asm, right);
+                    let opnd1 = split_bitmask_immediate(asm, right, opnd0.rm_num_bits());
                    asm.test(opnd0, opnd1);
                },
                _ => {
@ -1186,6 +1193,24 @@ mod tests {
        assert_eq!(20, cb.get_write_pos());
    }

+    #[test]
+    fn test_emit_test_32b_reg_not_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(u32::MAX.into()));
+        // All ones is not encodable with a bitmask immediate,
+        // so this needs one register
+        asm.compile_with_num_regs(&mut cb, 1);
+    }
+
+    #[test]
+    fn test_emit_test_32b_reg_bitmask_imm() {
+        let (mut asm, mut cb) = setup_asm();
+        let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+        asm.test(w0, Opnd::UImm(0x80000001));
+        asm.compile_with_num_regs(&mut cb, 0);
+    }
+
    #[test]
    fn test_emit_or() {
        let (mut asm, mut cb) = setup_asm();