Bug 1664453: vendor Cranelift to 379aed8092cd1241ec7839e77d05557b1dceb234 to resolve two Wasm translation bugs. r=jseward

This Cranelift update to revision 379aed8092cd1241ec7839e77d05557b1dceb234 includes its PRs #2197 and #2194, which fix two Wasm translation bugs, as well a other miscellaneous updates and fixes. Fixes both Bug 1664453 and Bug 1663861. Differential Revision: https://phabricator.services.mozilla.com/D90306
2020-09-15 20:01:26 +00:00 · 2020-09-15 20:01:26 +00:00 · 0931312b73
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
 [source."https://github.com/bytecodealliance/wasmtime"]
 git = "https://github.com/bytecodealliance/wasmtime"
 replace-with = "vendored-sources"
-rev = "a7f7c23bf9c37c642da962d575b7c99007918872"
+rev = "379aed8092cd1241ec7839e77d05557b1dceb234"

 [source."https://github.com/badboy/failure"]
 git = "https://github.com/badboy/failure"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -762,7 +762,7 @@ dependencies = [
 [[package]]
 name = "cranelift-bforest"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"
 dependencies = [
 "cranelift-entity 0.66.0",
 ]
@ -770,7 +770,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"
 dependencies = [
 "byteorder",
 "cranelift-bforest",
@ -787,7 +787,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-meta"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"
 dependencies = [
 "cranelift-codegen-shared",
 "cranelift-entity 0.66.0",
@ -796,7 +796,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-shared"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"

 [[package]]
 name = "cranelift-entity"
@ -806,12 +806,12 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=477d8fc53a6
 [[package]]
 name = "cranelift-entity"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"

 [[package]]
 name = "cranelift-frontend"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"
 dependencies = [
 "cranelift-codegen",
 "log",
@ -822,7 +822,7 @@ dependencies = [
 [[package]]
 name = "cranelift-wasm"
 version = "0.66.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=a7f7c23bf9c37c642da962d575b7c99007918872#a7f7c23bf9c37c642da962d575b7c99007918872"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=379aed8092cd1241ec7839e77d05557b1dceb234#379aed8092cd1241ec7839e77d05557b1dceb234"
 dependencies = [
 "cranelift-codegen",
 "cranelift-entity 0.66.0",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -75,8 +75,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f

 [patch.crates-io.cranelift-codegen]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "a7f7c23bf9c37c642da962d575b7c99007918872"
+rev = "379aed8092cd1241ec7839e77d05557b1dceb234"

 [patch.crates-io.cranelift-wasm]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "a7f7c23bf9c37c642da962d575b7c99007918872"
+rev = "379aed8092cd1241ec7839e77d05557b1dceb234"
--- a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
--- a/third_party/rust/cranelift-codegen/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen/.cargo-checksum.json
--- a/third_party/rust/cranelift-codegen/Cargo.toml
+++ b/third_party/rust/cranelift-codegen/Cargo.toml
@ -29,6 +29,7 @@ peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" }
 peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.66.0" }
 peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.66.0" }
 regalloc = "0.0.30"
+souper-ir = { version = "1", optional = true }
 wast = { version = "22.0.0", optional = true }
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
@ -87,5 +88,8 @@ rebuild-peephole-optimizers = ["peepmatic", "peepmatic-traits", "wast"]
 # Enable the use of `peepmatic`-generated peephole optimizers.
 enable-peepmatic = ["peepmatic-runtime", "peepmatic-traits", "serde"]

+# Enable support for the Souper harvester.
+souper-harvest = ["souper-ir", "souper-ir/stringify"]
+
 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen/src/context.rs
+++ b/third_party/rust/cranelift-codegen/src/context.rs
@ -36,9 +36,14 @@ use crate::timing;
 use crate::unreachable_code::eliminate_unreachable_code;
 use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges};
 use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
+#[cfg(feature = "souper-harvest")]
+use alloc::string::String;
 use alloc::vec::Vec;
 use log::debug;

+#[cfg(feature = "souper-harvest")]
+use crate::souper_harvest::do_souper_harvest;
+
 /// Persistent data structures and compilation pipeline.
 pub struct Context {
    /// The function we're compiling.
@ -447,4 +452,14 @@ impl Context {
            isa,
        ))
    }
+
+    /// Harvest candidate left-hand sides for superoptimization with Souper.
+    #[cfg(feature = "souper-harvest")]
+    pub fn souper_harvest(
+        &mut self,
+        out: &mut std::sync::mpsc::Sender<String>,
+    ) -> CodegenResult<()> {
+        do_souper_harvest(&self.func, out);
+        Ok(())
+    }
 }
--- a/third_party/rust/cranelift-codegen/src/ir/instructions.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/instructions.rs
@ -18,6 +18,7 @@ use crate::isa;

 use crate::bitset::BitSet;
 use crate::entity;
+use ir::condcodes::{FloatCC, IntCC};

 /// Some instructions use an external list of argument values because there is not enough space in
 /// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in
@ -295,6 +296,33 @@ impl InstructionData {
        }
    }

+    /// If this is a control-flow instruction depending on an integer condition, gets its
+    /// condition.  Otherwise, return `None`.
+    pub fn cond_code(&self) -> Option<IntCC> {
+        match self {
+            &InstructionData::IntCond { cond, .. }
+            | &InstructionData::BranchIcmp { cond, .. }
+            | &InstructionData::IntCompare { cond, .. }
+            | &InstructionData::IntCondTrap { cond, .. }
+            | &InstructionData::BranchInt { cond, .. }
+            | &InstructionData::IntSelect { cond, .. }
+            | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
+            _ => None,
+        }
+    }
+
+    /// If this is a control-flow instruction depending on a floating-point condition, gets its
+    /// condition.  Otherwise, return `None`.
+    pub fn fp_cond_code(&self) -> Option<FloatCC> {
+        match self {
+            &InstructionData::BranchFloat { cond, .. }
+            | &InstructionData::FloatCompare { cond, .. }
+            | &InstructionData::FloatCond { cond, .. }
+            | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
+            _ => None,
+        }
+    }
+
    /// If this is a trapping instruction, get an exclusive reference to its
    /// trap code. Otherwise, return `None`.
    pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> {
@ -307,6 +335,27 @@ impl InstructionData {
        }
    }

+    /// If this is an atomic read/modify/write instruction, return its subopcode.
+    pub fn atomic_rmw_op(&self) -> Option<ir::AtomicRmwOp> {
+        match self {
+            &InstructionData::AtomicRmw { op, .. } => Some(op),
+            _ => None,
+        }
+    }
+
+    /// If this is a load/store instruction, returns its immediate offset.
+    pub fn load_store_offset(&self) -> Option<i32> {
+        match self {
+            &InstructionData::Load { offset, .. }
+            | &InstructionData::StackLoad { offset, .. }
+            | &InstructionData::LoadComplex { offset, .. }
+            | &InstructionData::Store { offset, .. }
+            | &InstructionData::StackStore { offset, .. }
+            | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
+            _ => None,
+        }
+    }
+
    /// Return information about a call instruction.
    ///
    /// Any instruction that can call another function reveals its call signature here.
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
@ -13,16 +13,15 @@ use alloc::boxed::Box;
 use alloc::vec::Vec;
 use regalloc::{RealReg, Reg, RegClass, Set, Writable};
 use smallvec::SmallVec;
-use std::convert::TryFrom;

 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
 // these ABIs are very similar.

 /// Support for the AArch64 ABI from the callee side (within a function body).
-pub type AArch64ABIBody = ABIBodyImpl<AArch64MachineImpl>;
+pub(crate) type AArch64ABICallee = ABICalleeImpl<AArch64MachineDeps>;

 /// Support for the AArch64 ABI from the caller side (at a callsite).
-pub type AArch64ABICall = ABICallImpl<AArch64MachineImpl>;
+pub(crate) type AArch64ABICaller = ABICallerImpl<AArch64MachineDeps>;

 // Spidermonkey specific ABI convention.

@ -105,9 +104,9 @@ impl Into<AMode> for StackAMode {

 /// AArch64-specific ABI behavior. This struct just serves as an implementation
 /// point for the trait; it is never actually instantiated.
-pub struct AArch64MachineImpl;
+pub(crate) struct AArch64MachineDeps;

-impl ABIMachineImpl for AArch64MachineImpl {
+impl ABIMachineSpec for AArch64MachineDeps {
    type I = Inst;

    fn compute_arg_locs(
@ -285,7 +284,8 @@ impl ABIMachineImpl for AArch64MachineImpl {
        Inst::Ret
    }

-    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u64) -> SmallVec<[Inst; 4]> {
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
+        let imm = imm as u64;
        let mut insts = SmallVec::new();
        if let Some(imm12) = Imm12::maybe_from_u64(imm) {
            insts.push(Inst::AluRRImm12 {
@ -296,6 +296,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
            });
        } else {
            let scratch2 = writable_tmp2_reg();
+            assert_ne!(scratch2.to_reg(), from_reg);
            insts.extend(Inst::load_constant(scratch2, imm.into()));
            insts.push(Inst::AluRRRExtend {
                alu_op: ALUOp::Add64,
@ -310,11 +311,12 @@ impl ABIMachineImpl for AArch64MachineImpl {

    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
        let mut insts = SmallVec::new();
-        insts.push(Inst::AluRRR {
-            alu_op: ALUOp::SubS64XR,
+        insts.push(Inst::AluRRRExtend {
+            alu_op: ALUOp::SubS64,
            rd: writable_zero_reg(),
            rn: stack_reg(),
            rm: limit_reg,
+            extendop: ExtendOp::UXTX,
        });
        insts.push(Inst::TrapIf {
            trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
@ -334,29 +336,29 @@ impl ABIMachineImpl for AArch64MachineImpl {
        Inst::LoadAddr { rd: into_reg, mem }
    }

-    fn get_fixed_tmp_reg() -> Reg {
+    fn get_stacklimit_reg() -> Reg {
        spilltmp_reg()
    }

-    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i64, ty: Type) -> Inst {
-        let mem = AMode::RegOffset(base, offset, ty);
+    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
        Inst::gen_load(into_reg, mem, ty)
    }

-    fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Inst {
-        let mem = AMode::RegOffset(base, offset, ty);
+    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64, ty);
        Inst::gen_store(mem, from_reg, ty)
    }

-    fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Inst; 2]> {
+    fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
        if amount == 0 {
            return SmallVec::new();
        }

        let (amount, is_sub) = if amount > 0 {
-            (u64::try_from(amount).unwrap(), false)
+            (amount as u64, false)
        } else {
-            (u64::try_from(-amount).unwrap(), true)
+            (-amount as u64, true)
        };

        let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
@ -372,10 +374,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
            ret.push(adj_inst);
        } else {
            let tmp = writable_spilltmp_reg();
-            let const_inst = Inst::LoadConst64 {
-                rd: tmp,
-                const_data: amount,
-            };
+            let const_inst = Inst::load_constant(tmp, amount);
            let adj_inst = Inst::AluRRRExtend {
                alu_op,
                rd: writable_stack_reg(),
@ -383,14 +382,16 @@ impl ABIMachineImpl for AArch64MachineImpl {
                rm: tmp.to_reg(),
                extendop: ExtendOp::UXTX,
            };
-            ret.push(const_inst);
+            ret.extend(const_inst);
            ret.push(adj_inst);
        }
        ret
    }

-    fn gen_nominal_sp_adj(offset: i64) -> Inst {
-        Inst::VirtualSPOffsetAdj { offset }
+    fn gen_nominal_sp_adj(offset: i32) -> Inst {
+        Inst::VirtualSPOffsetAdj {
+            offset: offset as i64,
+        }
    }

    fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
@ -553,11 +554,12 @@ impl ABIMachineImpl for AArch64MachineImpl {
        defs: Vec<Writable<Reg>>,
        loc: SourceLoc,
        opcode: ir::Opcode,
-    ) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> {
+        tmp: Writable<Reg>,
+    ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
        let mut insts = SmallVec::new();
        match &dest {
            &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
-                true,
+                InstIsSafepoint::Yes,
                Inst::Call {
                    info: Box::new(CallInfo {
                        dest: name.clone(),
@ -570,19 +572,19 @@ impl ABIMachineImpl for AArch64MachineImpl {
            )),
            &CallDest::ExtName(ref name, RelocDistance::Far) => {
                insts.push((
-                    false,
+                    InstIsSafepoint::No,
                    Inst::LoadExtName {
-                        rd: writable_spilltmp_reg(),
+                        rd: tmp,
                        name: Box::new(name.clone()),
                        offset: 0,
                        srcloc: loc,
                    },
                ));
                insts.push((
-                    true,
+                    InstIsSafepoint::Yes,
                    Inst::CallInd {
                        info: Box::new(CallIndInfo {
-                            rn: spilltmp_reg(),
+                            rn: tmp.to_reg(),
                            uses,
                            defs,
                            loc,
@ -592,7 +594,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
                ));
            }
            &CallDest::Reg(reg) => insts.push((
-                true,
+                InstIsSafepoint::Yes,
                Inst::CallInd {
                    info: Box::new(CallIndInfo {
                        rn: *reg,
@ -608,7 +610,7 @@ impl ABIMachineImpl for AArch64MachineImpl {
        insts
    }

-    fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 {
+    fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
        // We allocate in terms of 8-byte slots.
        match (rc, ty) {
            (RegClass::I64, _) => 1,
@ -698,9 +700,10 @@ fn get_callee_saves(
            }
        }
    }
-    // Sort registers for deterministic code output.
-    int_saves.sort_by_key(|r| r.to_reg().get_index());
-    vec_saves.sort_by_key(|r| r.to_reg().get_index());
+    // Sort registers for deterministic code output. We can do an unstable sort because the
+    // registers will be unique (there are no dups).
+    int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
+    vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
    (int_saves, vec_saves)
 }

--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@ -575,7 +575,7 @@ impl ScalarSize {
            32 => ScalarSize::Size32,
            64 => ScalarSize::Size64,
            128 => ScalarSize::Size128,
-            _ => panic!("Unexpected type width"),
+            w => panic!("Unexpected type width: {}", w),
        }
    }

@ -591,7 +591,7 @@ impl ScalarSize {
            ScalarSize::Size16 => 0b11,
            ScalarSize::Size32 => 0b00,
            ScalarSize::Size64 => 0b01,
-            _ => panic!("Unexpected scalar FP operand size"),
+            _ => panic!("Unexpected scalar FP operand size: {:?}", self),
        }
    }
 }
@ -612,6 +612,7 @@ impl VectorSize {
    /// Convert from a type into a vector operand size.
    pub fn from_ty(ty: Type) -> VectorSize {
        match ty {
+            B32X4 => VectorSize::Size32x4,
            F32X2 => VectorSize::Size32x2,
            F32X4 => VectorSize::Size32x4,
            F64X2 => VectorSize::Size64x2,
@ -622,7 +623,7 @@ impl VectorSize {
            I32X2 => VectorSize::Size32x2,
            I32X4 => VectorSize::Size32x4,
            I64X2 => VectorSize::Size64x2,
-            _ => unimplemented!(),
+            _ => unimplemented!("Unsupported type: {}", ty),
        }
    }

--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@ -170,7 +170,7 @@ fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
    }
 }

-const MOVE_WIDE_FIXED: u32 = 0x92800000;
+const MOVE_WIDE_FIXED: u32 = 0x12800000;

 #[repr(u32)]
 enum MoveWideOpcode {
@ -179,9 +179,15 @@ enum MoveWideOpcode {
    MOVK = 0b11,
 }

-fn enc_move_wide(op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst) -> u32 {
+fn enc_move_wide(
+    op: MoveWideOpcode,
+    rd: Writable<Reg>,
+    imm: MoveWideConst,
+    size: OperandSize,
+) -> u32 {
    assert!(imm.shift <= 0b11);
    MOVE_WIDE_FIXED
+        | size.sf_bit() << 31
        | (op as u32) << 29
        | u32::from(imm.shift) << 21
        | u32::from(imm.bits) << 5
@ -434,7 +440,7 @@ pub struct EmitState {
 }

 impl MachInstEmitState<Inst> for EmitState {
-    fn new(abi: &dyn ABIBody<I = Inst>) -> Self {
+    fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
        EmitState {
            virtual_sp_offset: 0,
            nominal_sp_to_fp: abi.frame_size() as i64,
@ -491,21 +497,12 @@ impl MachInstEmit for Inst {
                    ALUOp::AddS64 => 0b10101011_000,
                    ALUOp::SubS32 => 0b01101011_000,
                    ALUOp::SubS64 => 0b11101011_000,
-                    ALUOp::SubS64XR => 0b11101011_001,
                    ALUOp::SDiv64 => 0b10011010_110,
                    ALUOp::UDiv64 => 0b10011010_110,
                    ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
                    ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
-
-                    ALUOp::MAdd32
-                    | ALUOp::MAdd64
-                    | ALUOp::MSub32
-                    | ALUOp::MSub64
-                    | ALUOp::SMulH
-                    | ALUOp::UMulH => {
-                        //// RRRR ops.
-                        panic!("Bad ALUOp {:?} in RRR form!", alu_op);
-                    }
+                    ALUOp::SMulH => 0b10011011_010,
+                    ALUOp::UMulH => 0b10011011_110,
                };
                let bit15_10 = match alu_op {
                    ALUOp::SDiv64 => 0b000011,
@ -514,16 +511,13 @@ impl MachInstEmit for Inst {
                    ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
                    ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
                    ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
-                    ALUOp::SubS64XR => 0b011000,
+                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
                    _ => 0b000000,
                };
                debug_assert_ne!(writable_stack_reg(), rd);
-                // The stack pointer is the zero register if this instruction
-                // doesn't have access to extended registers, so this might be
-                // an indication that something is wrong.
-                if alu_op != ALUOp::SubS64XR {
-                    debug_assert_ne!(stack_reg(), rn);
-                }
+                // The stack pointer is the zero register in this context, so this might be an
+                // indication that something is wrong.
+                debug_assert_ne!(stack_reg(), rn);
                debug_assert_ne!(stack_reg(), rm);
                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
            }
@ -535,13 +529,10 @@ impl MachInstEmit for Inst {
                ra,
            } => {
                let (top11, bit15) = match alu_op {
-                    ALUOp::MAdd32 => (0b0_00_11011_000, 0),
-                    ALUOp::MSub32 => (0b0_00_11011_000, 1),
-                    ALUOp::MAdd64 => (0b1_00_11011_000, 0),
-                    ALUOp::MSub64 => (0b1_00_11011_000, 1),
-                    ALUOp::SMulH => (0b1_00_11011_010, 0),
-                    ALUOp::UMulH => (0b1_00_11011_110, 0),
-                    _ => unimplemented!("{:?}", alu_op),
+                    ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
+                    ALUOp3::MSub32 => (0b0_00_11011_000, 1),
+                    ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
+                    ALUOp3::MSub64 => (0b1_00_11011_000, 1),
                };
                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
            }
@ -999,7 +990,7 @@ impl MachInstEmit for Inst {
                    }
                }
            }
-            &Inst::Mov { rd, rm } => {
+            &Inst::Mov64 { rd, rm } => {
                assert!(rd.to_reg().get_class() == rm.get_class());
                assert!(rm.get_class() == RegClass::I64);

@ -1029,9 +1020,15 @@ impl MachInstEmit for Inst {
                // Encoded as ORR rd, rm, zero.
                sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
            }
-            &Inst::MovZ { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm)),
-            &Inst::MovN { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm)),
-            &Inst::MovK { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm)),
+            &Inst::MovZ { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
+            }
+            &Inst::MovN { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
+            }
+            &Inst::MovK { rd, imm, size } => {
+                sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
+            }
            &Inst::CSel { rd, rn, rm, cond } => {
                sink.put4(enc_csel(rd, rn, rm, cond));
            }
@ -2077,19 +2074,6 @@ impl MachInstEmit for Inst {
                // disable the worst-case-size check in this case.
                start_off = sink.cur_offset();
            }
-            &Inst::LoadConst64 { rd, const_data } => {
-                let inst = Inst::ULoad64 {
-                    rd,
-                    mem: AMode::Label(MemLabel::PCRel(8)),
-                    srcloc: None, // can't cause a user trap.
-                };
-                inst.emit(sink, flags, state);
-                let inst = Inst::Jump {
-                    dest: BranchTarget::ResolvedOffset(12),
-                };
-                inst.emit(sink, flags, state);
-                sink.put8(const_data);
-            }
            &Inst::LoadExtName {
                rd,
                ref name,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@ -777,19 +777,20 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::AluRRR {
-            alu_op: ALUOp::SubS64XR,
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::SubS64,
            rd: writable_zero_reg(),
            rn: stack_reg(),
            rm: xreg(12),
+            extendop: ExtendOp::UXTX,
        },
        "FF632CEB",
-        "subs xzr, sp, x12",
+        "subs xzr, sp, x12, UXTX",
    ));

    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp::MAdd32,
+            alu_op: ALUOp3::MAdd32,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@ -800,7 +801,7 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp::MAdd64,
+            alu_op: ALUOp3::MAdd64,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@ -811,7 +812,7 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp::MSub32,
+            alu_op: ALUOp3::MSub32,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@ -822,7 +823,7 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp::MSub64,
+            alu_op: ALUOp3::MSub64,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@ -832,23 +833,21 @@ fn test_aarch64_binemit() {
        "msub x1, x2, x3, x4",
    ));
    insns.push((
-        Inst::AluRRRR {
+        Inst::AluRRR {
            alu_op: ALUOp::SMulH,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
-            ra: zero_reg(),
        },
        "417C439B",
        "smulh x1, x2, x3",
    ));
    insns.push((
-        Inst::AluRRRR {
+        Inst::AluRRR {
            alu_op: ALUOp::UMulH,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
-            ra: zero_reg(),
        },
        "417CC39B",
        "umulh x1, x2, x3",
@ -1370,8 +1369,8 @@ fn test_aarch64_binemit() {
            mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1
            srcloc: None,
        },
-        "300080D21002A0F2B063308B010240F9",
-        "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+        "300080521002A072B063308B010240F9",
+        "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
    ));

    insns.push((
@ -1654,7 +1653,7 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::Mov {
+        Inst::Mov64 {
            rd: writable_xreg(8),
            rm: xreg(9),
        },
@ -1674,6 +1673,7 @@ fn test_aarch64_binemit() {
        Inst::MovZ {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FF9FD2",
        "movz x8, #65535",
@ -1682,6 +1682,7 @@ fn test_aarch64_binemit() {
        Inst::MovZ {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFBFD2",
        "movz x8, #65535, LSL #16",
@ -1690,6 +1691,7 @@ fn test_aarch64_binemit() {
        Inst::MovZ {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFDFD2",
        "movz x8, #65535, LSL #32",
@ -1698,15 +1700,26 @@ fn test_aarch64_binemit() {
        Inst::MovZ {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFFFD2",
        "movz x8, #65535, LSL #48",
    ));
+    insns.push((
+        Inst::MovZ {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size32,
+        },
+        "E8FFBF52",
+        "movz w8, #65535, LSL #16",
+    ));

    insns.push((
        Inst::MovN {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FF9F92",
        "movn x8, #65535",
@ -1715,6 +1728,7 @@ fn test_aarch64_binemit() {
        Inst::MovN {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFBF92",
        "movn x8, #65535, LSL #16",
@ -1723,6 +1737,7 @@ fn test_aarch64_binemit() {
        Inst::MovN {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFDF92",
        "movn x8, #65535, LSL #32",
@ -1731,15 +1746,26 @@ fn test_aarch64_binemit() {
        Inst::MovN {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFFF92",
        "movn x8, #65535, LSL #48",
    ));
+    insns.push((
+        Inst::MovN {
+            rd: writable_xreg(8),
+            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size32,
+        },
+        "E8FF9F12",
+        "movn w8, #65535",
+    ));

    insns.push((
        Inst::MovK {
            rd: writable_xreg(12),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "0C0080F2",
        "movk x12, #0",
@ -1748,6 +1774,7 @@ fn test_aarch64_binemit() {
        Inst::MovK {
            rd: writable_xreg(19),
            imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
+            size: OperandSize::Size64,
        },
        "1300A0F2",
        "movk x19, #0, LSL #16",
@ -1756,6 +1783,7 @@ fn test_aarch64_binemit() {
        Inst::MovK {
            rd: writable_xreg(3),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+            size: OperandSize::Size64,
        },
        "E3FF9FF2",
        "movk x3, #65535",
@ -1764,6 +1792,7 @@ fn test_aarch64_binemit() {
        Inst::MovK {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFBFF2",
        "movk x8, #65535, LSL #16",
@ -1772,6 +1801,7 @@ fn test_aarch64_binemit() {
        Inst::MovK {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFDFF2",
        "movk x8, #65535, LSL #32",
@ -1780,6 +1810,7 @@ fn test_aarch64_binemit() {
        Inst::MovK {
            rd: writable_xreg(8),
            imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+            size: OperandSize::Size64,
        },
        "E8FFFFF2",
        "movk x8, #65535, LSL #48",
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@ -45,15 +45,11 @@ pub enum ALUOp {
    Sub64,
    Orr32,
    Orr64,
-    /// NOR
    OrrNot32,
-    /// NOR
    OrrNot64,
    And32,
    And64,
-    /// NAND
    AndNot32,
-    /// NAND
    AndNot64,
    /// XOR (AArch64 calls this "EOR")
    Eor32,
@ -71,16 +67,6 @@ pub enum ALUOp {
    SubS32,
    /// Sub, setting flags
    SubS64,
-    /// Sub, setting flags, using extended registers
-    SubS64XR,
-    /// Multiply-add
-    MAdd32,
-    /// Multiply-add
-    MAdd64,
-    /// Multiply-sub
-    MSub32,
-    /// Multiply-sub
-    MSub64,
    /// Signed multiply, high-word result
    SMulH,
    /// Unsigned multiply, high-word result
@ -97,6 +83,19 @@ pub enum ALUOp {
    Lsl64,
 }

+/// An ALU operation with three arguments.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp3 {
+    /// Multiply-add
+    MAdd32,
+    /// Multiply-add
+    MAdd64,
+    /// Multiply-sub
+    MSub32,
+    /// Multiply-sub
+    MSub64,
+}
+
 /// A floating-point unit (FPU) operation with one arg.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
 pub enum FPUOp1 {
@ -433,7 +432,7 @@ pub enum Inst {
    },
    /// An ALU operation with three register sources and a register destination.
    AluRRRR {
-        alu_op: ALUOp,
+        alu_op: ALUOp3,
        rd: Writable<Reg>,
        rn: Reg,
        rm: Reg,
@ -571,7 +570,7 @@ pub enum Inst {
    /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
    /// keep them separate at the `Inst` level for better pretty-printing
    /// and faster `is_move()` logic.
-    Mov {
+    Mov64 {
        rd: Writable<Reg>,
        rm: Reg,
    },
@ -587,18 +586,21 @@ pub enum Inst {
    MovZ {
        rd: Writable<Reg>,
        imm: MoveWideConst,
+        size: OperandSize,
    },

    /// A MOVN with a 16-bit immediate.
    MovN {
        rd: Writable<Reg>,
        imm: MoveWideConst,
+        size: OperandSize,
    },

    /// A MOVK with a 16-bit immediate.
    MovK {
        rd: Writable<Reg>,
        imm: MoveWideConst,
+        size: OperandSize,
    },

    /// A sign- or zero-extend operation.
@ -1070,12 +1072,6 @@ pub enum Inst {
        rtmp2: Writable<Reg>,
    },

-    /// Load an inline constant.
-    LoadConst64 {
-        rd: Writable<Reg>,
-        const_data: u64,
-    },
-
    /// Load an inline symbol reference.
    LoadExtName {
        rd: Writable<Reg>,
@ -1122,9 +1118,9 @@ pub enum Inst {
    },
 }

-fn count_zero_half_words(mut value: u64) -> usize {
+fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
    let mut count = 0;
-    for _ in 0..4 {
+    for _ in 0..num_half_words {
        if value & 0xffff == 0 {
            count += 1;
        }
@ -1146,7 +1142,7 @@ impl Inst {
    pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
        if from_reg.get_class() == RegClass::I64 {
-            Inst::Mov {
+            Inst::Mov64 {
                rd: to_reg,
                rm: from_reg,
            }
@ -1176,10 +1172,18 @@ impl Inst {
    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
        if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
-            smallvec![Inst::MovZ { rd, imm }]
+            smallvec![Inst::MovZ {
+                rd,
+                imm,
+                size: OperandSize::Size64
+            }]
        } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
            // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
-            smallvec![Inst::MovN { rd, imm }]
+            smallvec![Inst::MovN {
+                rd,
+                imm,
+                size: OperandSize::Size64
+            }]
        } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
            // Weird logical-instruction immediate in ORI using zero register
            smallvec![Inst::AluRRImmLogic {
@ -1191,15 +1195,22 @@ impl Inst {
        } else {
            let mut insts = smallvec![];

+            // If the top 32 bits are zero, use 32-bit `mov` operations.
+            let (num_half_words, size, negated) = if value >> 32 == 0 {
+                (2, OperandSize::Size32, (!value << 32) >> 32)
+            } else {
+                (4, OperandSize::Size64, !value)
+            };
            // If the number of 0xffff half words is greater than the number of 0x0000 half words
            // it is more efficient to use `movn` for the first instruction.
-            let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
+            let first_is_inverted = count_zero_half_words(negated, num_half_words)
+                > count_zero_half_words(value, num_half_words);
            // Either 0xffff or 0x0000 half words can be skipped, depending on the first
            // instruction used.
            let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
            let mut first_mov_emitted = false;

-            for i in 0..4 {
+            for i in 0..num_half_words {
                let imm16 = (value >> (16 * i)) & 0xffff;
                if imm16 != ignored_halfword {
                    if !first_mov_emitted {
@ -1208,15 +1219,15 @@ impl Inst {
                            let imm =
                                MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
                                    .unwrap();
-                            insts.push(Inst::MovN { rd, imm });
+                            insts.push(Inst::MovN { rd, imm, size });
                        } else {
                            let imm =
                                MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
-                            insts.push(Inst::MovZ { rd, imm });
+                            insts.push(Inst::MovZ { rd, imm, size });
                        }
                    } else {
                        let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
-                        insts.push(Inst::MovK { rd, imm });
+                        insts.push(Inst::MovK { rd, imm, size });
                    }
                }
            }
@ -1286,7 +1297,22 @@ impl Inst {
                mem,
                srcloc: None,
            },
-            _ => unimplemented!("gen_load({})", ty),
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = into_reg;
+                    let srcloc = None;
+
+                    if bits == 128 {
+                        Inst::FpuLoad128 { rd, mem, srcloc }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuLoad64 { rd, mem, srcloc }
+                    }
+                } else {
+                    unimplemented!("gen_load({})", ty);
+                }
+            }
        }
    }

@ -1323,7 +1349,22 @@ impl Inst {
                mem,
                srcloc: None,
            },
-            _ => unimplemented!("gen_store({})", ty),
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = from_reg;
+                    let srcloc = None;
+
+                    if bits == 128 {
+                        Inst::FpuStore128 { rd, mem, srcloc }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuStore64 { rd, mem, srcloc }
+                    }
+                } else {
+                    unimplemented!("gen_store({})", ty);
+                }
+            }
        }
    }
 }
@ -1440,7 +1481,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rt2);
            pairmemarg_regs(mem, collector);
        }
-        &Inst::Mov { rd, rm } => {
+        &Inst::Mov64 { rd, rm } => {
            collector.add_def(rd);
            collector.add_use(rm);
        }
@ -1713,7 +1754,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rtmp1);
            collector.add_def(rtmp2);
        }
-        &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => {
+        &Inst::LoadExtName { rd, .. } => {
            collector.add_def(rd);
        }
        &Inst::LoadAddr { rd, mem: _ } => {
@ -1973,7 +2014,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rt2);
            map_pairmem(mapper, mem);
        }
-        &mut Inst::Mov {
+        &mut Inst::Mov64 {
            ref mut rd,
            ref mut rm,
        } => {
@ -2404,9 +2445,6 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rtmp1);
            map_def(mapper, rtmp2);
        }
-        &mut Inst::LoadConst64 { ref mut rd, .. } => {
-            map_def(mapper, rd);
-        }
        &mut Inst::LoadExtName { ref mut rd, .. } => {
            map_def(mapper, rd);
        }
@ -2438,7 +2476,7 @@ impl MachInst for Inst {

    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
        match self {
-            &Inst::Mov { rd, rm } => Some((rd, rm)),
+            &Inst::Mov64 { rd, rm } => Some((rd, rm)),
            &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
            &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
            _ => None,
@ -2609,11 +2647,6 @@ impl Inst {
                ALUOp::AddS64 => ("adds", OperandSize::Size64),
                ALUOp::SubS32 => ("subs", OperandSize::Size32),
                ALUOp::SubS64 => ("subs", OperandSize::Size64),
-                ALUOp::SubS64XR => ("subs", OperandSize::Size64),
-                ALUOp::MAdd32 => ("madd", OperandSize::Size32),
-                ALUOp::MAdd64 => ("madd", OperandSize::Size64),
-                ALUOp::MSub32 => ("msub", OperandSize::Size32),
-                ALUOp::MSub64 => ("msub", OperandSize::Size64),
                ALUOp::SMulH => ("smulh", OperandSize::Size64),
                ALUOp::UMulH => ("umulh", OperandSize::Size64),
                ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
@ -2652,19 +2685,18 @@ impl Inst {
                rm,
                ra,
            } => {
-                let (op, size) = op_name_size(alu_op);
-                let four_args = alu_op != ALUOp::SMulH && alu_op != ALUOp::UMulH;
+                let (op, size) = match alu_op {
+                    ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
+                    ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
+                    ALUOp3::MSub32 => ("msub", OperandSize::Size32),
+                    ALUOp3::MSub64 => ("msub", OperandSize::Size64),
+                };
                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
                let rn = show_ireg_sized(rn, mb_rru, size);
                let rm = show_ireg_sized(rm, mb_rru, size);
                let ra = show_ireg_sized(ra, mb_rru, size);
-                if four_args {
-                    format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
-                } else {
-                    // smulh and umulh have Ra "hard-wired" to the zero register
-                    // and the canonical assembly form has only three regs.
-                    format!("{} {}, {}, {}", op, rd, rn, rm)
-                }
+
+                format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
            }
            &Inst::AluRRImm12 {
                alu_op,
@ -2860,7 +2892,7 @@ impl Inst {
                let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
                format!("ldp {}, {}, {}", rt, rt2, mem)
            }
-            &Inst::Mov { rd, rm } => {
+            &Inst::Mov64 { rd, rm } => {
                let rd = rd.to_reg().show_rru(mb_rru);
                let rm = rm.show_rru(mb_rru);
                format!("mov {}, {}", rd, rm)
@ -2870,18 +2902,18 @@ impl Inst {
                let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
                format!("mov {}, {}", rd, rm)
            }
-            &Inst::MovZ { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+            &Inst::MovZ { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
                let imm = imm.show_rru(mb_rru);
                format!("movz {}, {}", rd, imm)
            }
-            &Inst::MovN { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+            &Inst::MovN { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
                let imm = imm.show_rru(mb_rru);
                format!("movn {}, {}", rd, imm)
            }
-            &Inst::MovK { rd, ref imm } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
+            &Inst::MovK { rd, ref imm, size } => {
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
                let imm = imm.show_rru(mb_rru);
                format!("movk {}, {}", rd, imm)
            }
@ -3517,10 +3549,6 @@ impl Inst {
                    info.targets
                )
            }
-            &Inst::LoadConst64 { rd, const_data } => {
-                let rd = rd.show_rru(mb_rru);
-                format!("ldr {}, 8 ; b 12 ; data {:?}", rd, const_data)
-            }
            &Inst::LoadExtName {
                rd,
                ref name,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@ -7,11 +7,10 @@
 //!
 //! - Floating-point immediates (FIMM instruction).

-use crate::ir;
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{InstructionData, Opcode, TrapCode, Type};
+use crate::ir::{InstructionData, Opcode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::CodegenResult;
@ -106,26 +105,6 @@ pub(crate) enum ResultRegImmShift {
    ImmShift(ImmShift),
 }

-//============================================================================
-// Instruction input "slots".
-//
-// We use these types to refer to operand numbers, and result numbers, together
-// with the associated instruction, in a type-safe way.
-
-/// Identifier for a particular input of an instruction.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) struct InsnInput {
-    pub(crate) insn: IRInst,
-    pub(crate) input: usize,
-}
-
-/// Identifier for a particular output of an instruction.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) struct InsnOutput {
-    pub(crate) insn: IRInst,
-    pub(crate) output: usize,
-}
-
 //============================================================================
 // Lowering: convert instruction inputs to forms that we can use.

@ -191,11 +170,6 @@ impl NarrowValueMode {
    }
 }

-/// Allocate a register for an instruction output and return it.
-pub(crate) fn get_output_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
-    ctx.get_output(out.insn, out.output)
-}
-
 /// Lower an instruction input to a reg.
 ///
 /// The given register will be extended appropriately, according to
@ -211,12 +185,12 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
    let from_bits = ty_bits(ty) as u8;
    let inputs = ctx.get_input(input.insn, input.input);
    let in_reg = if let Some(c) = inputs.constant {
+        // Generate constants fresh at each use to minimize long-range register pressure.
        let masked = if from_bits < 64 {
            c & ((1u64 << from_bits) - 1)
        } else {
            c
        };
-        // Generate constants fresh at each use to minimize long-range register pressure.
        let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
        for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
            ctx.alloc_tmp(reg_class, ty)
@ -1023,58 +997,6 @@ pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
    }
 }

-pub(crate) fn ldst_offset(data: &InstructionData) -> Option<i32> {
-    match data {
-        &InstructionData::Load { offset, .. }
-        | &InstructionData::StackLoad { offset, .. }
-        | &InstructionData::LoadComplex { offset, .. }
-        | &InstructionData::Store { offset, .. }
-        | &InstructionData::StackStore { offset, .. }
-        | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
-        _ => None,
-    }
-}
-
-pub(crate) fn inst_condcode(data: &InstructionData) -> Option<IntCC> {
-    match data {
-        &InstructionData::IntCond { cond, .. }
-        | &InstructionData::BranchIcmp { cond, .. }
-        | &InstructionData::IntCompare { cond, .. }
-        | &InstructionData::IntCondTrap { cond, .. }
-        | &InstructionData::BranchInt { cond, .. }
-        | &InstructionData::IntSelect { cond, .. }
-        | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
-        _ => None,
-    }
-}
-
-pub(crate) fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
-    match data {
-        &InstructionData::BranchFloat { cond, .. }
-        | &InstructionData::FloatCompare { cond, .. }
-        | &InstructionData::FloatCond { cond, .. }
-        | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
-        _ => None,
-    }
-}
-
-pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
-    match data {
-        &InstructionData::Trap { code, .. }
-        | &InstructionData::CondTrap { code, .. }
-        | &InstructionData::IntCondTrap { code, .. }
-        | &InstructionData::FloatCondTrap { code, .. } => Some(code),
-        _ => None,
-    }
-}
-
-pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option<ir::AtomicRmwOp> {
-    match data {
-        &InstructionData::AtomicRmw { op, .. } => Some(op),
-        _ => None,
-    }
-}
-
 /// Checks for an instance of `op` feeding the given input.
 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
    c: &mut C,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
@ -50,6 +50,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
    match op {
        Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
            let value = ctx.get_constant(insn).unwrap();
+            // Sign extend constant if necessary
+            let value = match ty.unwrap() {
+                I8 => (((value as i64) << 56) >> 56) as u64,
+                I16 => (((value as i64) << 48) >> 48) as u64,
+                I32 => (((value as i64) << 32) >> 32) as u64,
+                I64 | R64 => value,
+                ty if ty.is_bool() => value,
+                ty => unreachable!("Unknown type for const: {}", ty),
+            };
            let rd = get_output_reg(ctx, outputs[0]);
            lower_constant_u64(ctx, rd, value);
        }
@ -203,7 +212,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
            if !ty.is_vector() {
-                let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
+                let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
                ctx.emit(Inst::AluRRRR {
                    alu_op,
                    rd,
@ -340,19 +349,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                I64 => {
                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                    let ra = zero_reg();
                    let alu_op = if is_signed {
                        ALUOp::SMulH
                    } else {
                        ALUOp::UMulH
                    };
-                    ctx.emit(Inst::AluRRRR {
-                        alu_op,
-                        rd,
-                        rn,
-                        rm,
-                        ra,
-                    });
+                    ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
                }
                I32 | I16 | I8 => {
                    let narrow_mode = if is_signed {
@ -364,7 +366,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
                    let ra = zero_reg();
                    ctx.emit(Inst::AluRRRR {
-                        alu_op: ALUOp::MAdd64,
+                        alu_op: ALUOp3::MAdd64,
                        rd,
                        rn,
                        rm,
@ -453,7 +455,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                });

                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp::MSub64,
+                    alu_op: ALUOp3::MSub64,
                    rd: rd,
                    rn: rd.to_reg(),
                    rm: rm,
@ -1090,7 +1092,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::Uload16x4
        | Opcode::Sload32x2
        | Opcode::Uload32x2 => {
-            let off = ldst_offset(ctx.data(insn)).unwrap();
+            let off = ctx.data(insn).load_store_offset().unwrap();
            let elem_ty = match op {
                Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
                    I8
@ -1175,7 +1177,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::Istore8Complex
        | Opcode::Istore16Complex
        | Opcode::Istore32Complex => {
-            let off = ldst_offset(ctx.data(insn)).unwrap();
+            let off = ctx.data(insn).load_store_offset().unwrap();
            let elem_ty = match op {
                Opcode::Istore8 | Opcode::Istore8Complex => I8,
                Opcode::Istore16 | Opcode::Istore16Complex => I16,
@ -1245,7 +1247,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
            ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
            // Now the AtomicRMW insn itself
-            let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
+            let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
            ctx.emit(Inst::AtomicRMW {
                ty: ty_access,
                op,
@ -1364,7 +1366,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let cond = if let Some(icmp_insn) =
                maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
            {
-                let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap();
+                let condcode = ctx.data(icmp_insn).cond_code().unwrap();
                let cond = lower_condcode(condcode);
                let is_signed = condcode_is_signed(condcode);
                lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed);
@ -1372,7 +1374,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else if let Some(fcmp_insn) =
                maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
            {
-                let condcode = inst_fp_condcode(ctx.data(fcmp_insn)).unwrap();
+                let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
                let cond = lower_fp_condcode(condcode);
                lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
                cond
@ -1411,7 +1413,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Selectif | Opcode::SelectifSpectreGuard => {
-            let condcode = inst_condcode(ctx.data(insn)).unwrap();
+            let condcode = ctx.data(insn).cond_code().unwrap();
            let cond = lower_condcode(condcode);
            let is_signed = condcode_is_signed(condcode);
            // Verification ensures that the input is always a
@ -1483,7 +1485,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Trueif => {
-            let condcode = inst_condcode(ctx.data(insn)).unwrap();
+            let condcode = ctx.data(insn).cond_code().unwrap();
            let cond = lower_condcode(condcode);
            let is_signed = condcode_is_signed(condcode);
            // Verification ensures that the input is always a
@ -1496,7 +1498,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Trueff => {
-            let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
+            let condcode = ctx.data(insn).fp_cond_code().unwrap();
            let cond = lower_fp_condcode(condcode);
            let ffcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ffcmp).unwrap();
            lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
@ -1686,7 +1688,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Icmp => {
-            let condcode = inst_condcode(ctx.data(insn)).unwrap();
+            let condcode = ctx.data(insn).cond_code().unwrap();
            let cond = lower_condcode(condcode);
            let is_signed = condcode_is_signed(condcode);
            let rd = get_output_reg(ctx, outputs[0]);
@ -1713,7 +1715,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Fcmp => {
-            let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
+            let condcode = ctx.data(insn).fp_cond_code().unwrap();
            let cond = lower_fp_condcode(condcode);
            let ty = ctx.input_ty(insn, 0);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
@ -1746,15 +1748,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Trap | Opcode::ResumableTrap => {
-            let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
+            let trap_info = (ctx.srcloc(insn), ctx.data(insn).trap_code().unwrap());
            ctx.emit_safepoint(Inst::Udf { trap_info });
        }

        Opcode::Trapif | Opcode::Trapff => {
-            let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
+            let trap_info = (ctx.srcloc(insn), ctx.data(insn).trap_code().unwrap());

            let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
-                let condcode = inst_condcode(ctx.data(insn)).unwrap();
+                let condcode = ctx.data(insn).cond_code().unwrap();
                let cond = lower_condcode(condcode);
                // The flags must not have been clobbered by any other
                // instruction between the iadd_ifcout and this instruction, as
@ -1762,7 +1764,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // flags here.
                cond
            } else if op == Opcode::Trapif {
-                let condcode = inst_condcode(ctx.data(insn)).unwrap();
+                let condcode = ctx.data(insn).cond_code().unwrap();
                let cond = lower_condcode(condcode);
                let is_signed = condcode_is_signed(condcode);

@ -1771,7 +1773,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
                cond
            } else {
-                let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
+                let condcode = ctx.data(insn).fp_cond_code().unwrap();
                let cond = lower_fp_condcode(condcode);

                // Verification ensures that the input is always a
@ -1835,7 +1837,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    assert!(inputs.len() == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
                    (
-                        AArch64ABICall::from_func(sig, &extname, dist, loc)?,
+                        AArch64ABICaller::from_func(sig, &extname, dist, loc)?,
                        &inputs[..],
                    )
                }
@ -1844,7 +1846,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() - 1 == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
-                    (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
+                    (AArch64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
                }
                _ => unreachable!(),
            };
@ -2782,7 +2784,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::TlsValue => unimplemented!(),
+        Opcode::TlsValue => unimplemented!("tls_value"),
    }

    Ok(())
@ -2824,7 +2826,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                if let Some(icmp_insn) =
                    maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
                {
-                    let condcode = inst_condcode(ctx.data(icmp_insn)).unwrap();
+                    let condcode = ctx.data(icmp_insn).cond_code().unwrap();
                    let cond = lower_condcode(condcode);
                    let is_signed = condcode_is_signed(condcode);
                    let negated = op0 == Opcode::Brz;
@ -2839,7 +2841,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                } else if let Some(fcmp_insn) =
                    maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
                {
-                    let condcode = inst_fp_condcode(ctx.data(fcmp_insn)).unwrap();
+                    let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
                    let cond = lower_fp_condcode(condcode);
                    let negated = op0 == Opcode::Brz;
                    let cond = if negated { cond.invert() } else { cond };
@ -2872,7 +2874,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                }
            }
            Opcode::BrIcmp => {
-                let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
+                let condcode = ctx.data(branches[0]).cond_code().unwrap();
                let cond = lower_condcode(condcode);
                let kind = CondBrKind::Cond(cond);

@ -2913,7 +2915,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
            }

            Opcode::Brif => {
-                let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
+                let condcode = ctx.data(branches[0]).cond_code().unwrap();
                let cond = lower_condcode(condcode);
                let kind = CondBrKind::Cond(cond);

@ -2943,7 +2945,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
            }

            Opcode::Brff => {
-                let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
+                let condcode = ctx.data(branches[0]).fp_cond_code().unwrap();
                let cond = lower_fp_condcode(condcode);
                let kind = CondBrKind::Cond(cond);
                let flag_input = InsnInput {
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
@ -47,7 +47,7 @@ impl AArch64Backend {
        func: &Function,
        flags: settings::Flags,
    ) -> CodegenResult<VCode<inst::Inst>> {
-        let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
+        let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
        compile::compile::<AArch64Backend>(func, self, abi)
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@ -412,6 +412,7 @@ pub enum SseOpcode {
    Psubd,
    Psubq,
    Psubw,
+    Pxor,
    Rcpss,
    Roundss,
    Roundsd,
@ -512,6 +513,7 @@ impl SseOpcode {
            | SseOpcode::Psubd
            | SseOpcode::Psubq
            | SseOpcode::Psubw
+            | SseOpcode::Pxor
            | SseOpcode::Sqrtpd
            | SseOpcode::Sqrtsd
            | SseOpcode::Subpd
@ -607,6 +609,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Psubd => "psubd",
            SseOpcode::Psubq => "psubq",
            SseOpcode::Psubw => "psubw",
+            SseOpcode::Pxor => "pxor",
            SseOpcode::Rcpss => "rcpss",
            SseOpcode::Roundss => "roundss",
            SseOpcode::Roundsd => "roundsd",
@ -998,6 +1001,14 @@ pub enum OperandSize {
 }

 impl OperandSize {
+    pub(crate) fn from_bytes(num_bytes: u32) -> Self {
+        match num_bytes {
+            1 | 2 | 4 => OperandSize::Size32,
+            8 => OperandSize::Size64,
+            _ => unreachable!(),
+        }
+    }
+
    pub(crate) fn to_bytes(&self) -> u8 {
        match self {
            Self::Size32 => 4,
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
@ -628,17 +628,46 @@ pub(crate) fn emit(
            }
        }

+        Inst::Not { size, src } => {
+            let (opcode, prefix, rex_flags) = match size {
+                1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+                2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+                _ => unreachable!("{}", size),
+            };
+
+            let subopcode = 2;
+            let src = int_reg_enc(src.to_reg());
+            emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+        }
+
+        Inst::Neg { size, src } => {
+            let (opcode, prefix, rex_flags) = match size {
+                1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+                2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+                _ => unreachable!("{}", size),
+            };
+
+            let subopcode = 3;
+            let src = int_reg_enc(src.to_reg());
+            emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+        }
+
        Inst::Div {
            size,
            signed,
            divisor,
            loc,
        } => {
-            let (prefix, rex_flags) = match size {
-                2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
-                4 => (LegacyPrefixes::None, RexFlags::clear_w()),
-                8 => (LegacyPrefixes::None, RexFlags::set_w()),
-                _ => unreachable!(),
+            let (opcode, prefix, rex_flags) = match size {
+                1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+                2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+                _ => unreachable!("{}", size),
            };

            sink.add_trap(*loc, TrapCode::IntegerDivisionByZero);
@ -647,12 +676,12 @@ pub(crate) fn emit(
            match divisor {
                RegMem::Reg { reg } => {
                    let src = int_reg_enc(*reg);
-                    emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
+                    emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
                }
                RegMem::Mem { addr: src } => emit_std_enc_mem(
                    sink,
                    prefix,
-                    0xF7,
+                    opcode,
                    1,
                    subopcode,
                    &src.finalize(state),
@ -687,15 +716,22 @@ pub(crate) fn emit(
            }
        }

-        Inst::SignExtendRaxRdx { size } => {
-            match size {
-                2 => sink.put1(0x66),
-                4 => {}
-                8 => sink.put1(0x48),
-                _ => unreachable!(),
+        Inst::SignExtendData { size } => match size {
+            1 => {
+                sink.put1(0x66);
+                sink.put1(0x98);
            }
-            sink.put1(0x99);
-        }
+            2 => {
+                sink.put1(0x66);
+                sink.put1(0x99);
+            }
+            4 => sink.put1(0x99),
+            8 => {
+                sink.put1(0x48);
+                sink.put1(0x99);
+            }
+            _ => unreachable!(),
+        },

        Inst::CheckedDivOrRemSeq {
            kind,
@ -755,7 +791,11 @@ pub(crate) fn emit(
                    // x % -1 = 0; put the result into the destination, $rdx.
                    let done_label = sink.get_label();

-                    let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx()));
+                    let inst = Inst::imm(
+                        OperandSize::from_bytes(*size as u32),
+                        0,
+                        Writable::from_reg(regs::rdx()),
+                    );
                    inst.emit(sink, flags, state);

                    let inst = Inst::jmp_known(BranchTarget::Label(done_label));
@ -767,7 +807,7 @@ pub(crate) fn emit(
                    if *size == 8 {
                        let tmp = tmp.expect("temporary for i64 sdiv");

-                        let inst = Inst::imm_r(true, 0x8000000000000000, tmp);
+                        let inst = Inst::imm(OperandSize::Size64, 0x8000000000000000, tmp);
                        inst.emit(sink, flags, state);

                        let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
@ -791,14 +831,19 @@ pub(crate) fn emit(
                sink.bind_label(do_op);
            }

+            assert!(
+                *size > 1,
+                "CheckedDivOrRemSeq for i8 is not yet implemented"
+            );
+
            // Fill in the high parts:
            if kind.is_signed() {
                // sign-extend the sign-bit of rax into rdx, for signed opcodes.
-                let inst = Inst::sign_extend_rax_to_rdx(*size);
+                let inst = Inst::sign_extend_data(*size);
                inst.emit(sink, flags, state);
            } else {
                // zero for unsigned opcodes.
-                let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx()));
+                let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
                inst.emit(sink, flags, state);
            }

@ -813,18 +858,30 @@ pub(crate) fn emit(
            }
        }

-        Inst::Imm_R {
+        Inst::Imm {
            dst_is_64,
            simm64,
            dst,
        } => {
            let enc_dst = int_reg_enc(dst.to_reg());
            if *dst_is_64 {
-                // FIXME JRS 2020Feb10: also use the 32-bit case here when
-                // possible
-                sink.put1(0x48 | ((enc_dst >> 3) & 1));
-                sink.put1(0xB8 | (enc_dst & 7));
-                sink.put8(*simm64);
+                if low32_will_sign_extend_to_64(*simm64) {
+                    // Sign-extended move imm32.
+                    emit_std_enc_enc(
+                        sink,
+                        LegacyPrefixes::None,
+                        0xC7,
+                        1,
+                        /* subopcode */ 0,
+                        enc_dst,
+                        RexFlags::set_w(),
+                    );
+                    sink.put4(*simm64 as u32);
+                } else {
+                    sink.put1(0x48 | ((enc_dst >> 3) & 1));
+                    sink.put1(0xB8 | (enc_dst & 7));
+                    sink.put8(*simm64);
+                }
            } else {
                if ((enc_dst >> 3) & 1) == 1 {
                    sink.put1(0x41);
@ -1099,7 +1156,7 @@ pub(crate) fn emit(
        }

        Inst::Shift_R {
-            is_64,
+            size,
            kind,
            num_bits,
            dst,
@ -1113,25 +1170,39 @@ pub(crate) fn emit(
                ShiftKind::ShiftRightArithmetic => 7,
            };

-            let rex = if *is_64 {
-                RexFlags::set_w()
-            } else {
-                RexFlags::clear_w()
-            };
-
            match num_bits {
                None => {
+                    let (opcode, prefix, rex_flags) = match size {
+                        1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
+                        2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
+                        4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
+                        8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
+                        _ => unreachable!("{}", size),
+                    };
+
+                    // SHL/SHR/SAR %cl, reg8 is (REX.W==0) D2 /subopcode
+                    // SHL/SHR/SAR %cl, reg16 is 66 (REX.W==0) D3 /subopcode
                    // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
                    // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
-                    emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex);
+                    emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
                }

                Some(num_bits) => {
+                    let (opcode, prefix, rex_flags) = match size {
+                        1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
+                        2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
+                        4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
+                        8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
+                        _ => unreachable!("{}", size),
+                    };
+
+                    // SHL/SHR/SAR $ib, reg8 is (REX.W==0) C0 /subopcode
+                    // SHL/SHR/SAR $ib, reg16 is 66 (REX.W==0) C1 /subopcode
                    // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
                    // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
                    // When the shift amount is 1, there's an even shorter encoding, but we don't
                    // bother with that nicety here.
-                    emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex);
+                    emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
                    sink.put1(*num_bits);
                }
            }
@ -1703,6 +1774,7 @@ pub(crate) fn emit(
                SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
                SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
                SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
+                SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
                SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
                SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
                SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
@ -2019,12 +2091,7 @@ pub(crate) fn emit(
            inst.emit(sink, flags, state);

            // tmp_gpr1 := src >> 1
-            let inst = Inst::shift_r(
-                /*is_64*/ true,
-                ShiftKind::ShiftRightLogical,
-                Some(1),
-                *tmp_gpr1,
-            );
+            let inst = Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(1), *tmp_gpr1);
            inst.emit(sink, flags, state);

            let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);
@ -2172,10 +2239,10 @@ pub(crate) fn emit(

                // Otherwise, put INT_MAX.
                if *dst_size == OperandSize::Size64 {
-                    let inst = Inst::imm_r(true, 0x7fffffffffffffff, *dst);
+                    let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
                    inst.emit(sink, flags, state);
                } else {
-                    let inst = Inst::imm_r(false, 0x7fffffff, *dst);
+                    let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
                    inst.emit(sink, flags, state);
                }
            } else {
@ -2197,7 +2264,7 @@ pub(crate) fn emit(
                match *src_size {
                    OperandSize::Size32 => {
                        let cst = Ieee32::pow2(output_bits - 1).neg().bits();
-                        let inst = Inst::imm32_r_unchecked(cst as u64, *tmp_gpr);
+                        let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
                        inst.emit(sink, flags, state);
                    }
                    OperandSize::Size64 => {
@ -2209,7 +2276,7 @@ pub(crate) fn emit(
                        } else {
                            Ieee64::pow2(output_bits - 1).neg()
                        };
-                        let inst = Inst::imm_r(true, cst.bits(), *tmp_gpr);
+                        let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
                        inst.emit(sink, flags, state);
                    }
                }
@ -2311,15 +2378,14 @@ pub(crate) fn emit(

            let done = sink.get_label();

-            if *src_size == OperandSize::Size64 {
-                let cst = Ieee64::pow2(dst_size.to_bits() - 1).bits();
-                let inst = Inst::imm_r(true, cst, *tmp_gpr);
-                inst.emit(sink, flags, state);
+            let cst = if *src_size == OperandSize::Size64 {
+                Ieee64::pow2(dst_size.to_bits() - 1).bits()
            } else {
-                let cst = Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64;
-                let inst = Inst::imm32_r_unchecked(cst, *tmp_gpr);
-                inst.emit(sink, flags, state);
-            }
+                Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64
+            };
+
+            let inst = Inst::imm(*src_size, cst, *tmp_gpr);
+            inst.emit(sink, flags, state);

            let inst =
                Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
@ -2403,8 +2469,8 @@ pub(crate) fn emit(
            if *is_saturating {
                // The input was "large" (>= 2**(width -1)), so the only way to get an integer
                // overflow is because the input was too large: saturate to the max value.
-                let inst = Inst::imm_r(
-                    true,
+                let inst = Inst::imm(
+                    OperandSize::Size64,
                    if *dst_size == OperandSize::Size64 {
                        u64::max_value()
                    } else {
@ -2424,7 +2490,7 @@ pub(crate) fn emit(
            sink.bind_label(next_is_large);

            if *dst_size == OperandSize::Size64 {
-                let inst = Inst::imm_r(true, 1 << 63, *tmp_gpr);
+                let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
                inst.emit(sink, flags, state);

                let inst = Inst::alu_rmi_r(
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
@ -1242,6 +1242,52 @@ fn test_x64_emit() {
        "bsrq    %r15, %rax",
    ));

+    // ========================================================
+    // Not
+    insns.push((
+        Inst::not(4, Writable::from_reg(regs::rsi())),
+        "F7D6",
+        "notl    %esi",
+    ));
+    insns.push((
+        Inst::not(8, Writable::from_reg(regs::r15())),
+        "49F7D7",
+        "notq    %r15",
+    ));
+    insns.push((
+        Inst::not(4, Writable::from_reg(regs::r14())),
+        "41F7D6",
+        "notl    %r14d",
+    ));
+    insns.push((
+        Inst::not(2, Writable::from_reg(regs::rdi())),
+        "66F7D7",
+        "notw    %di",
+    ));
+
+    // ========================================================
+    // Neg
+    insns.push((
+        Inst::neg(4, Writable::from_reg(regs::rsi())),
+        "F7DE",
+        "negl    %esi",
+    ));
+    insns.push((
+        Inst::neg(8, Writable::from_reg(regs::r15())),
+        "49F7DF",
+        "negq    %r15",
+    ));
+    insns.push((
+        Inst::neg(4, Writable::from_reg(regs::r14())),
+        "41F7DE",
+        "negl    %r14d",
+    ));
+    insns.push((
+        Inst::neg(2, Writable::from_reg(regs::rdi())),
+        "66F7DF",
+        "negw    %di",
+    ));
+
    // ========================================================
    // Div
    insns.push((
@ -1308,53 +1354,57 @@ fn test_x64_emit() {
        "mul     %rdi",
    ));

+    // ========================================================
+    // cbw
+    insns.push((Inst::sign_extend_data(1), "6698", "cbw"));
+
    // ========================================================
    // cdq family: SignExtendRaxRdx
-    insns.push((Inst::sign_extend_rax_to_rdx(2), "6699", "cwd"));
-    insns.push((Inst::sign_extend_rax_to_rdx(4), "99", "cdq"));
-    insns.push((Inst::sign_extend_rax_to_rdx(8), "4899", "cqo"));
+    insns.push((Inst::sign_extend_data(2), "6699", "cwd"));
+    insns.push((Inst::sign_extend_data(4), "99", "cdq"));
+    insns.push((Inst::sign_extend_data(8), "4899", "cqo"));

    // ========================================================
    // Imm_R
    //
    insns.push((
-        Inst::imm_r(false, 1234567, w_r14),
+        Inst::imm(OperandSize::Size32, 1234567, w_r14),
        "41BE87D61200",
        "movl    $1234567, %r14d",
    ));
    insns.push((
-        Inst::imm_r(false, -126i64 as u64, w_r14),
+        Inst::imm(OperandSize::Size32, -126i64 as u64, w_r14),
        "41BE82FFFFFF",
        "movl    $-126, %r14d",
    ));
    insns.push((
-        Inst::imm_r(true, 1234567898765, w_r14),
+        Inst::imm(OperandSize::Size64, 1234567898765, w_r14),
        "49BE8D26FB711F010000",
        "movabsq $1234567898765, %r14",
    ));
    insns.push((
-        Inst::imm_r(true, -126i64 as u64, w_r14),
-        "49BE82FFFFFFFFFFFFFF",
+        Inst::imm(OperandSize::Size64, -126i64 as u64, w_r14),
+        "49C7C682FFFFFF",
        "movabsq $-126, %r14",
    ));
    insns.push((
-        Inst::imm_r(false, 1234567, w_rcx),
+        Inst::imm(OperandSize::Size32, 1234567, w_rcx),
        "B987D61200",
        "movl    $1234567, %ecx",
    ));
    insns.push((
-        Inst::imm_r(false, -126i64 as u64, w_rcx),
+        Inst::imm(OperandSize::Size32, -126i64 as u64, w_rcx),
        "B982FFFFFF",
        "movl    $-126, %ecx",
    ));
    insns.push((
-        Inst::imm_r(true, 1234567898765, w_rsi),
+        Inst::imm(OperandSize::Size64, 1234567898765, w_rsi),
        "48BE8D26FB711F010000",
        "movabsq $1234567898765, %rsi",
    ));
    insns.push((
-        Inst::imm_r(true, -126i64 as u64, w_rbx),
-        "48BB82FFFFFFFFFFFFFF",
+        Inst::imm(OperandSize::Size64, -126i64 as u64, w_rbx),
+        "48C7C382FFFFFF",
        "movabsq $-126, %rbx",
    ));

@ -2315,130 +2365,150 @@ fn test_x64_emit() {
    // ========================================================
    // Shift_R
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_rdi),
+        Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_rdi),
        "D3E7",
        "shll    %cl, %edi",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_r12),
+        Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_r12),
        "41D3E4",
        "shll    %cl, %r12d",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftLeft, Some(2), w_r8),
+        Inst::shift_r(4, ShiftKind::ShiftLeft, Some(2), w_r8),
        "41C1E002",
        "shll    $2, %r8d",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftLeft, Some(31), w_r13),
+        Inst::shift_r(4, ShiftKind::ShiftLeft, Some(31), w_r13),
        "41C1E51F",
        "shll    $31, %r13d",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_r13),
+        Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_r13),
        "49D3E5",
        "shlq    %cl, %r13",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_rdi),
+        Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_rdi),
        "48D3E7",
        "shlq    %cl, %rdi",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftLeft, Some(2), w_r8),
+        Inst::shift_r(8, ShiftKind::ShiftLeft, Some(2), w_r8),
        "49C1E002",
        "shlq    $2, %r8",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftLeft, Some(3), w_rbx),
+        Inst::shift_r(8, ShiftKind::ShiftLeft, Some(3), w_rbx),
        "48C1E303",
        "shlq    $3, %rbx",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftLeft, Some(63), w_r13),
+        Inst::shift_r(8, ShiftKind::ShiftLeft, Some(63), w_r13),
        "49C1E53F",
        "shlq    $63, %r13",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightLogical, None, w_rdi),
+        Inst::shift_r(4, ShiftKind::ShiftRightLogical, None, w_rdi),
        "D3EF",
        "shrl    %cl, %edi",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+        Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(2), w_r8),
        "41C1E802",
        "shrl    $2, %r8d",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(31), w_r13),
+        Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(31), w_r13),
        "41C1ED1F",
        "shrl    $31, %r13d",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightLogical, None, w_rdi),
+        Inst::shift_r(8, ShiftKind::ShiftRightLogical, None, w_rdi),
        "48D3EF",
        "shrq    %cl, %rdi",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+        Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(2), w_r8),
        "49C1E802",
        "shrq    $2, %r8",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(63), w_r13),
+        Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(63), w_r13),
        "49C1ED3F",
        "shrq    $63, %r13",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+        Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, None, w_rdi),
        "D3FF",
        "sarl    %cl, %edi",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+        Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
        "41C1F802",
        "sarl    $2, %r8d",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(31), w_r13),
+        Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(31), w_r13),
        "41C1FD1F",
        "sarl    $31, %r13d",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+        Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, None, w_rdi),
        "48D3FF",
        "sarq    %cl, %rdi",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+        Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
        "49C1F802",
        "sarq    $2, %r8",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(63), w_r13),
+        Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(63), w_r13),
        "49C1FD3F",
        "sarq    $63, %r13",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::RotateLeft, None, w_r8),
+        Inst::shift_r(8, ShiftKind::RotateLeft, None, w_r8),
        "49D3C0",
        "rolq    %cl, %r8",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::RotateLeft, Some(3), w_r9),
+        Inst::shift_r(4, ShiftKind::RotateLeft, Some(3), w_r9),
        "41C1C103",
        "roll    $3, %r9d",
    ));
    insns.push((
-        Inst::shift_r(false, ShiftKind::RotateRight, None, w_rsi),
+        Inst::shift_r(4, ShiftKind::RotateRight, None, w_rsi),
        "D3CE",
        "rorl    %cl, %esi",
    ));
    insns.push((
-        Inst::shift_r(true, ShiftKind::RotateRight, Some(5), w_r15),
+        Inst::shift_r(8, ShiftKind::RotateRight, Some(5), w_r15),
        "49C1CF05",
        "rorq    $5, %r15",
    ));
+    insns.push((
+        Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
+        "D2CE",
+        "rorb    %cl, %sil",
+    ));
+    insns.push((
+        Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
+        "41C0CF05",
+        "rorb    $5, %r15b",
+    ));
+    insns.push((
+        Inst::shift_r(2, ShiftKind::RotateRight, None, w_rsi),
+        "66D3CE",
+        "rorw    %cl, %si",
+    ));
+    insns.push((
+        Inst::shift_r(2, ShiftKind::RotateRight, Some(5), w_r15),
+        "6641C1CF05",
+        "rorw    $5, %r15w",
+    ));

    // ========================================================
    // CmpRMIR
@ -3083,6 +3153,12 @@ fn test_x64_emit() {
        "pmuludq %xmm8, %xmm9",
    ));

+    insns.push((
+        Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
+        "66410FEFD3",
+        "pxor    %xmm11, %xmm2",
+    ));
+
    // XMM_Mov_R_M: float stores
    insns.push((
        Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None),
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
@ -1,5 +1,4 @@
-//! This module defines x86_64-specific machine instruction types.an explanation of what it's
-//! doing.
+//! This module defines x86_64-specific machine instruction types.
 #![allow(dead_code)]
 #![allow(non_snake_case)]
 #![allow(non_camel_case_types)]
@ -56,6 +55,18 @@ pub enum Inst {
        dst: Writable<Reg>,
    },

+    /// Bitwise not
+    Not {
+        size: u8, // 1, 2, 4 or 8
+        src: Writable<Reg>,
+    },
+
+    /// Integer negation
+    Neg {
+        size: u8, // 1, 2, 4 or 8
+        src: Writable<Reg>,
+    },
+
    /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
    Div {
        size: u8, // 1, 2, 4 or 8
@ -89,13 +100,14 @@ pub enum Inst {
    },

    /// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
-    SignExtendRaxRdx {
+    /// or al into ah: (cbw)
+    SignExtendData {
        size: u8, // 1, 2, 4 or 8
    },

    /// Constant materialization: (imm32 imm64) reg.
    /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
-    Imm_R {
+    Imm {
        dst_is_64: bool,
        simm64: u64,
        dst: Writable<Reg>,
@ -151,9 +163,9 @@ pub enum Inst {
        srcloc: Option<SourceLoc>,
    },

-    /// Arithmetic shifts: (shl shr sar) (l q) imm reg.
+    /// Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
    Shift_R {
-        is_64: bool,
+        size: u8, // 1, 2, 4 or 8
        kind: ShiftKind,
        /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
        num_bits: Option<u8>,
@ -512,6 +524,18 @@ impl Inst {
        Self::UnaryRmR { size, op, src, dst }
    }

+    pub(crate) fn not(size: u8, src: Writable<Reg>) -> Inst {
+        debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        Inst::Not { size, src }
+    }
+
+    pub(crate) fn neg(size: u8, src: Writable<Reg>) -> Inst {
+        debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        Inst::Neg { size, src }
+    }
+
    pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst {
        divisor.assert_regclass_is(RegClass::I64);
        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
@ -550,36 +574,23 @@ impl Inst {
        }
    }

-    pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst {
-        debug_assert!(size == 8 || size == 4 || size == 2);
-        Inst::SignExtendRaxRdx { size }
+    pub(crate) fn sign_extend_data(size: u8) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        Inst::SignExtendData { size }
    }

-    pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
+    pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        if !dst_is_64 {
-            debug_assert!(
-                low32_will_sign_extend_to_64(simm64),
-                "{} won't sign-extend to 64 bits!",
-                simm64
-            );
-        }
-        Inst::Imm_R {
+        // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
+        // the semantics of movl).
+        let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64;
+        Inst::Imm {
            dst_is_64,
            simm64,
            dst,
        }
    }

-    pub(crate) fn imm32_r_unchecked(simm64: u64, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::Imm_R {
-            dst_is_64: false,
-            simm64,
-            dst,
-        }
-    }
-
    pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
        debug_assert!(src.get_class() == RegClass::I64);
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
@ -861,19 +872,20 @@ impl Inst {
    }

    pub(crate) fn shift_r(
-        is_64: bool,
+        size: u8,
        kind: ShiftKind,
        num_bits: Option<u8>,
        dst: Writable<Reg>,
    ) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
        debug_assert!(if let Some(num_bits) = num_bits {
-            num_bits < if is_64 { 64 } else { 32 }
+            num_bits < size * 8
        } else {
            true
        });
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
        Inst::Shift_R {
-            is_64,
+            size,
            kind,
            num_bits,
            dst,
@ -1101,7 +1113,9 @@ impl Inst {

            Self::XMM_RM_R { op, src, dst, .. } => {
                src.to_reg() == Some(dst.to_reg())
-                    && (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
+                    && (*op == SseOpcode::Xorps
+                        || *op == SseOpcode::Xorpd
+                        || *op == SseOpcode::Pxor)
            }

            Self::XmmRmRImm { op, src, dst, imm } => {
@ -1178,6 +1192,18 @@ impl ShowWithRRU for Inst {
                show_ireg_sized(dst.to_reg(), mb_rru, *size),
            ),

+            Inst::Not { size, src } => format!(
+                "{} {}",
+                ljustify2("not".to_string(), suffixBWLQ(*size)),
+                show_ireg_sized(src.to_reg(), mb_rru, *size)
+            ),
+
+            Inst::Neg { size, src } => format!(
+                "{} {}",
+                ljustify2("neg".to_string(), suffixBWLQ(*size)),
+                show_ireg_sized(src.to_reg(), mb_rru, *size)
+            ),
+
            Inst::Div {
                size,
                signed,
@ -1221,7 +1247,8 @@ impl ShowWithRRU for Inst {
                show_ireg_sized(divisor.to_reg(), mb_rru, *size),
            ),

-            Inst::SignExtendRaxRdx { size } => match size {
+            Inst::SignExtendData { size } => match size {
+                1 => "cbw",
                2 => "cwd",
                4 => "cdq",
                8 => "cqo",
@ -1384,7 +1411,7 @@ impl ShowWithRRU for Inst {
                show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
            ),

-            Inst::Imm_R {
+            Inst::Imm {
                dst_is_64,
                simm64,
                dst,
@ -1464,22 +1491,22 @@ impl ShowWithRRU for Inst {
            ),

            Inst::Shift_R {
-                is_64,
+                size,
                kind,
                num_bits,
                dst,
            } => match num_bits {
                None => format!(
                    "{} %cl, {}",
-                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
-                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
+                    ljustify2(kind.to_string(), suffixBWLQ(*size)),
+                    show_ireg_sized(dst.to_reg(), mb_rru, *size)
                ),

                Some(num_bits) => format!(
                    "{} ${}, {}",
-                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
+                    ljustify2(kind.to_string(), suffixBWLQ(*size)),
                    num_bits,
-                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
+                    show_ireg_sized(dst.to_reg(), mb_rru, *size)
                ),
            },

@ -1643,9 +1670,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
                collector.add_mod(*dst);
            }
        }
-        Inst::Div { divisor, .. } => {
+        Inst::Not { src, .. } => {
+            collector.add_mod(*src);
+        }
+        Inst::Neg { src, .. } => {
+            collector.add_mod(*src);
+        }
+        Inst::Div { size, divisor, .. } => {
            collector.add_mod(Writable::from_reg(regs::rax()));
-            collector.add_mod(Writable::from_reg(regs::rdx()));
+            if *size == 1 {
+                collector.add_def(Writable::from_reg(regs::rdx()));
+            } else {
+                collector.add_mod(Writable::from_reg(regs::rdx()));
+            }
            divisor.get_regs_as_uses(collector);
        }
        Inst::MulHi { rhs, .. } => {
@ -1664,10 +1701,14 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
                collector.add_def(*tmp);
            }
        }
-        Inst::SignExtendRaxRdx { .. } => {
-            collector.add_use(regs::rax());
-            collector.add_def(Writable::from_reg(regs::rdx()));
-        }
+        Inst::SignExtendData { size } => match size {
+            1 => collector.add_mod(Writable::from_reg(regs::rax())),
+            2 | 4 | 8 => {
+                collector.add_use(regs::rax());
+                collector.add_def(Writable::from_reg(regs::rdx()));
+            }
+            _ => unreachable!(),
+        },
        Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
            src.get_regs_as_uses(collector);
            collector.add_def(*dst);
@ -1707,7 +1748,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            src.get_regs_as_uses(collector);
            collector.add_use(*dst);
        }
-        Inst::Imm_R { dst, .. } => {
+        Inst::Imm { dst, .. } => {
            collector.add_def(*dst);
        }
        Inst::Mov_R_R { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => {
@ -1959,6 +2000,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
                map_mod(mapper, dst);
            }
        }
+        Inst::Not { src, .. } | Inst::Neg { src, .. } => map_mod(mapper, src),
        Inst::Div { divisor, .. } => divisor.map_uses(mapper),
        Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
        Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
@ -1967,7 +2009,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
                map_def(mapper, tmp)
            }
        }
-        Inst::SignExtendRaxRdx { .. } => {}
+        Inst::SignExtendData { .. } => {}
        Inst::XmmUnaryRmR {
            ref mut src,
            ref mut dst,
@ -2042,7 +2084,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            src.map_uses(mapper);
            map_use(mapper, dst);
        }
-        Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
+        Inst::Imm { ref mut dst, .. } => map_def(mapper, dst),
        Inst::Mov_R_R {
            ref mut src,
            ref mut dst,
@ -2352,7 +2394,57 @@ impl MachInst for Inst {
        mut alloc_tmp: F,
    ) -> SmallVec<[Self; 4]> {
        let mut ret = SmallVec::new();
-        if ty.is_int() {
+        if ty == types::F32 {
+            if value == 0 {
+                ret.push(Inst::xmm_rm_r(
+                    SseOpcode::Xorps,
+                    RegMem::reg(to_reg.to_reg()),
+                    to_reg,
+                ));
+            } else {
+                let tmp = alloc_tmp(RegClass::I64, types::I32);
+                ret.push(Inst::imm(OperandSize::Size32, value, tmp));
+
+                ret.push(Inst::gpr_to_xmm(
+                    SseOpcode::Movd,
+                    RegMem::reg(tmp.to_reg()),
+                    OperandSize::Size32,
+                    to_reg,
+                ));
+            }
+        } else if ty == types::F64 {
+            if value == 0 {
+                ret.push(Inst::xmm_rm_r(
+                    SseOpcode::Xorpd,
+                    RegMem::reg(to_reg.to_reg()),
+                    to_reg,
+                ));
+            } else {
+                let tmp = alloc_tmp(RegClass::I64, types::I64);
+                ret.push(Inst::imm(OperandSize::Size64, value, tmp));
+
+                ret.push(Inst::gpr_to_xmm(
+                    SseOpcode::Movq,
+                    RegMem::reg(tmp.to_reg()),
+                    OperandSize::Size64,
+                    to_reg,
+                ));
+            }
+        } else {
+            // Must be an integer type.
+            debug_assert!(
+                ty == types::B1
+                    || ty == types::I8
+                    || ty == types::B8
+                    || ty == types::I16
+                    || ty == types::B16
+                    || ty == types::I32
+                    || ty == types::B32
+                    || ty == types::I64
+                    || ty == types::B64
+                    || ty == types::R32
+                    || ty == types::R64
+            );
            if value == 0 {
                ret.push(Inst::alu_rmi_r(
                    ty == types::I64,
@ -2361,42 +2453,11 @@ impl MachInst for Inst {
                    to_reg,
                ));
            } else {
-                let is_64 = ty == types::I64 && value > 0x7fffffff;
-                ret.push(Inst::imm_r(is_64, value, to_reg));
-            }
-        } else if value == 0 {
-            ret.push(Inst::xmm_rm_r(
-                SseOpcode::Xorps,
-                RegMem::reg(to_reg.to_reg()),
-                to_reg,
-            ));
-        } else {
-            match ty {
-                types::F32 => {
-                    let tmp = alloc_tmp(RegClass::I64, types::I32);
-                    ret.push(Inst::imm32_r_unchecked(value, tmp));
-
-                    ret.push(Inst::gpr_to_xmm(
-                        SseOpcode::Movd,
-                        RegMem::reg(tmp.to_reg()),
-                        OperandSize::Size32,
-                        to_reg,
-                    ));
-                }
-
-                types::F64 => {
-                    let tmp = alloc_tmp(RegClass::I64, types::I64);
-                    ret.push(Inst::imm_r(true, value, tmp));
-
-                    ret.push(Inst::gpr_to_xmm(
-                        SseOpcode::Movq,
-                        RegMem::reg(tmp.to_reg()),
-                        OperandSize::Size64,
-                        to_reg,
-                    ));
-                }
-
-                _ => panic!("unexpected type {:?} in gen_constant", ty),
+                ret.push(Inst::imm(
+                    OperandSize::from_bytes(ty.bytes()),
+                    value,
+                    to_reg,
+                ));
            }
        }
        ret
@ -2442,7 +2503,7 @@ impl MachInstEmit for Inst {
 }

 impl MachInstEmitState<Inst> for EmitState {
-    fn new(abi: &dyn ABIBody<I = Inst>) -> Self {
+    fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
        EmitState {
            virtual_sp_offset: 0,
            nominal_sp_to_fp: abi.frame_size() as i64,
--- a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@ -41,7 +41,7 @@ impl X64Backend {
    fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
        // This performs lowering to VCode, register-allocates the code, computes
        // block layout and finalizes branches. The result is ready for binary emission.
-        let abi = Box::new(abi::X64ABIBody::new(&func, flags)?);
+        let abi = Box::new(abi::X64ABICallee::new(&func, flags)?);
        compile::compile::<Self>(&func, self, abi)
    }
 }
--- a/third_party/rust/cranelift-codegen/src/lib.rs
+++ b/third_party/rust/cranelift-codegen/src/lib.rs
@ -116,6 +116,9 @@ mod value_label;
 #[cfg(feature = "enable-peepmatic")]
 mod peepmatic;

+#[cfg(feature = "souper-harvest")]
+mod souper_harvest;
+
 pub use crate::result::{CodegenError, CodegenResult};

 /// Version number of this crate.
--- a/third_party/rust/cranelift-codegen/src/machinst/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi.rs
@ -9,7 +9,7 @@ use regalloc::{Reg, Set, SpillSlot, Writable};

 /// Trait implemented by an object that tracks ABI-related state (e.g., stack
 /// layout) and can generate code while emitting the *body* of a function.
-pub trait ABIBody {
+pub trait ABICallee {
    /// The instruction type for the ISA associated with this ABI.
    type I: VCodeInst;

@ -17,7 +17,7 @@ pub trait ABIBody {
    /// as the `maybe_tmp` arg if so.
    fn temp_needed(&self) -> bool;

-    /// Initialize. This is called after the ABIBody is constructed because it
+    /// Initialize. This is called after the ABICallee is constructed because it
    /// may be provided with a temp vreg, which can only be allocated once the
    /// lowering context exists.
    fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
@ -155,14 +155,14 @@ pub trait ABIBody {
 /// callsite. It will usually be computed from the called function's
 /// signature.
 ///
-/// Unlike `ABIBody` above, methods on this trait are not invoked directly
+/// Unlike `ABICallee` above, methods on this trait are not invoked directly
 /// by the machine-independent code. Rather, the machine-specific lowering
-/// code will typically create an `ABICall` when creating machine instructions
+/// code will typically create an `ABICaller` when creating machine instructions
 /// for an IR call instruction inside `lower()`, directly emit the arg and
 /// and retval copies, and attach the register use/def info to the call.
 ///
 /// This trait is thus provided for convenience to the backends.
-pub trait ABICall {
+pub trait ABICaller {
    /// The instruction type for the ISA associated with this ABI.
    type I: VCodeInst;

@ -203,6 +203,6 @@ pub trait ABICall {
    /// sense.)
    ///
    /// This function should only be called once, as it is allowed to re-use
-    /// parts of the ABICall object in emitting instructions.
+    /// parts of the ABICaller object in emitting instructions.
    fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs
@ -119,6 +119,7 @@ use crate::{ir, isa};
 use alloc::vec::Vec;
 use log::{debug, trace};
 use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
+use std::convert::TryFrom;
 use std::marker::PhantomData;
 use std::mem;

@ -142,6 +143,16 @@ pub enum ArgsOrRets {
    Rets,
 }

+/// Is an instruction returned by an ABI machine-specific backend a safepoint,
+/// or not?
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum InstIsSafepoint {
+    /// The instruction is a safepoint.
+    Yes,
+    /// The instruction is not a safepoint.
+    No,
+}
+
 /// Abstract location for a machine-specific ABI impl to translate into the
 /// appropriate addressing mode.
 #[derive(Clone, Copy, Debug)]
@ -160,7 +171,7 @@ pub enum StackAMode {
 /// Trait implemented by machine-specific backend to provide information about
 /// register assignments and to allow generating the specific instructions for
 /// stack loads/saves, prologues/epilogues, etc.
-pub trait ABIMachineImpl {
+pub trait ABIMachineSpec {
    /// The instruction type.
    type I: VCodeInst;

@ -207,13 +218,15 @@ pub trait ABIMachineImpl {
    fn gen_epilogue_placeholder() -> Self::I;

    /// Generate an add-with-immediate. Note that even if this uses a scratch
-    /// register, the sequence must still be correct if the given source or dest
-    /// is the register returned by `get_fixed_tmp_reg()`; hence, for machines
-    /// that may need a scratch register to synthesize an arbitrary constant,
-    /// the machine backend should reserve *another* fixed temp register for
-    /// this purpose. (E.g., on AArch64, x16 is the ordinary fixed tmp, and x17
-    /// is the secondary fixed tmp used to implement this.)
-    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]>;
+    /// register, it must satisfy two requirements:
+    ///
+    /// - The add-imm sequence must only clobber caller-save registers, because
+    ///   it will be placed in the prologue before the clobbered callee-save
+    ///   registers are saved.
+    ///
+    /// - The add-imm sequence must work correctly when `from_reg` and/or
+    ///   `into_reg` are the register returned by `get_stacklimit_reg()`.
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]>;

    /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
    /// the stack pointer is less than the given limit register (assuming the
@ -224,21 +237,30 @@ pub trait ABIMachineImpl {
    /// SP-based offset).
    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;

-    /// Get a fixed (not used by regalloc) temp. This is needed for certain
-    /// sequences generated after the register allocator has already run.
-    fn get_fixed_tmp_reg() -> Reg;
+    /// Get a fixed register to use to compute a stack limit. This is needed for
+    /// certain sequences generated after the register allocator has already
+    /// run. This must satisfy two requirements:
+    ///
+    /// - It must be a caller-save register, because it will be clobbered in the
+    ///   prologue before the clobbered callee-save registers are saved.
+    ///
+    /// - It must be safe to pass as an argument and/or destination to
+    ///   `gen_add_imm()`. This is relevant when an addition with a large
+    ///   immediate needs its own temporary; it cannot use the same fixed
+    ///   temporary as this one.
+    fn get_stacklimit_reg() -> Reg;

    /// Generate a store to the given [base+offset] address.
-    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i64, ty: Type) -> Self::I;
+    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;

    /// Generate a load from the given [base+offset] address.
-    fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I;
+    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;

    /// Adjust the stack pointer up or down.
-    fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]>;
+    fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]>;

    /// Generate a meta-instruction that adjusts the nominal SP offset.
-    fn gen_nominal_sp_adj(amount: i64) -> Self::I;
+    fn gen_nominal_sp_adj(amount: i32) -> Self::I;

    /// Generate the usual frame-setup sequence for this architecture: e.g.,
    /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
@ -272,18 +294,20 @@ pub trait ABIMachineImpl {
        clobbers: &Set<Writable<RealReg>>,
    ) -> SmallVec<[Self::I; 16]>;

-    /// Generate a call instruction/sequence.
+    /// Generate a call instruction/sequence. This method is provided one
+    /// temporary register to use to synthesize the called address, if needed.
    fn gen_call(
        dest: &CallDest,
        uses: Vec<Reg>,
        defs: Vec<Writable<Reg>>,
        loc: SourceLoc,
        opcode: ir::Opcode,
-    ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>;
+        tmp: Writable<Reg>,
+    ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;

    /// Get the number of spillslots required for the given register-class and
    /// type.
-    fn get_spillslot_size(rc: RegClass, ty: Type) -> u32;
+    fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32;

    /// Get the current virtual-SP offset from an instruction-emission state.
    fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
@ -314,7 +338,7 @@ struct ABISig {
 }

 impl ABISig {
-    fn from_func_sig<M: ABIMachineImpl>(sig: &ir::Signature) -> CodegenResult<ABISig> {
+    fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
        // Compute args and retvals from signature. Handle retvals first,
        // because we may need to add a return-area arg to the args.
        let (rets, stack_ret_space, _) = M::compute_arg_locs(
@ -353,7 +377,7 @@ impl ABISig {
 }

 /// ABI object for a function body.
-pub struct ABIBodyImpl<M: ABIMachineImpl> {
+pub struct ABICalleeImpl<M: ABIMachineSpec> {
    /// Signature: arg and retval regs.
    sig: ABISig,
    /// Offsets to each stackslot.
@ -405,7 +429,7 @@ fn get_special_purpose_param_register(
    }
 }

-impl<M: ABIMachineImpl> ABIBodyImpl<M> {
+impl<M: ABIMachineSpec> ABICalleeImpl<M> {
    /// Create a new body ABI instance.
    pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
        debug!("ABI: func signature {:?}", f.signature);
@ -506,8 +530,7 @@ impl<M: ABIMachineImpl> ABIBodyImpl<M> {
        // `scratch`. If our stack size doesn't fit into an immediate this
        // means we need a second scratch register for loading the stack size
        // into a register.
-        let scratch = Writable::from_reg(M::get_fixed_tmp_reg());
-        let stack_size = u64::from(stack_size);
+        let scratch = Writable::from_reg(M::get_stacklimit_reg());
        insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter());
        insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
    }
@ -532,7 +555,7 @@ impl<M: ABIMachineImpl> ABIBodyImpl<M> {
 /// temporary register to store values in if necessary. Currently after we write
 /// to this register there's guaranteed to be no spilled values between where
 /// it's used, because we're not participating in register allocation anyway!
-fn gen_stack_limit<M: ABIMachineImpl>(
+fn gen_stack_limit<M: ABIMachineSpec>(
    f: &ir::Function,
    abi: &ABISig,
    gv: ir::GlobalValue,
@ -542,7 +565,7 @@ fn gen_stack_limit<M: ABIMachineImpl>(
    return (reg, insts);
 }

-fn generate_gv<M: ABIMachineImpl>(
+fn generate_gv<M: ABIMachineSpec>(
    f: &ir::Function,
    abi: &ABISig,
    gv: ir::GlobalValue,
@ -563,7 +586,7 @@ fn generate_gv<M: ABIMachineImpl>(
            readonly: _,
        } => {
            let base = generate_gv::<M>(f, abi, base, insts);
-            let into_reg = Writable::from_reg(M::get_fixed_tmp_reg());
+            let into_reg = Writable::from_reg(M::get_stacklimit_reg());
            insts.push(M::gen_load_base_offset(into_reg, base, offset.into(), I64));
            return into_reg.to_reg();
        }
@ -591,7 +614,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
    }
 }

-impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
+impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
    type I = M::I;

    fn temp_needed(&self) -> bool {
@ -676,6 +699,11 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
            }
            &ABIArg::Stack(off, mut ty, ext) => {
                let from_bits = ty_bits(ty) as u8;
+                // A machine ABI implementation should ensure that stack frames
+                // have "reasonable" size. All current ABIs for machinst
+                // backends (aarch64 and x64) enforce a 128MB limit.
+                let off = i32::try_from(off)
+                    .expect("Argument stack offset greater than 2GB; should hit impl limit first");
                // Trash the from_reg; it should be its last use.
                match (ext, from_bits) {
                    (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < 64 => {
@ -864,7 +892,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {

        if total_sp_adjust > 0 {
            // sub sp, sp, #total_stacksize
-            let adj = total_sp_adjust as i64;
+            let adj = total_sp_adjust as i32;
            insts.extend(M::gen_sp_reg_adjust(-adj));
        }

@ -873,7 +901,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
        insts.extend(clobber_insts);

        if clobber_size > 0 {
-            insts.push(M::gen_nominal_sp_adj(clobber_size as i64));
+            insts.push(M::gen_nominal_sp_adj(clobber_size as i32));
        }

        self.total_frame_size = Some(total_stacksize);
@ -911,7 +939,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
    }

    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
-        M::get_spillslot_size(rc, ty)
+        M::get_number_of_spillslots_for_value(rc, ty)
    }

    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I {
@ -930,7 +958,7 @@ impl<M: ABIMachineImpl> ABIBody for ABIBodyImpl<M> {
    }
 }

-fn abisig_to_uses_and_defs<M: ABIMachineImpl>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
+fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
    // Compute uses: all arg regs.
    let mut uses = Vec::new();
    for arg in &sig.args {
@ -953,7 +981,7 @@ fn abisig_to_uses_and_defs<M: ABIMachineImpl>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
 }

 /// ABI object for a callsite.
-pub struct ABICallImpl<M: ABIMachineImpl> {
+pub struct ABICallerImpl<M: ABIMachineSpec> {
    /// The called function's signature.
    sig: ABISig,
    /// All uses for the callsite, i.e., function args.
@ -979,17 +1007,17 @@ pub enum CallDest {
    Reg(Reg),
 }

-impl<M: ABIMachineImpl> ABICallImpl<M> {
+impl<M: ABIMachineSpec> ABICallerImpl<M> {
    /// Create a callsite ABI object for a call directly to the specified function.
    pub fn from_func(
        sig: &ir::Signature,
        extname: &ir::ExternalName,
        dist: RelocDistance,
        loc: ir::SourceLoc,
-    ) -> CodegenResult<ABICallImpl<M>> {
+    ) -> CodegenResult<ABICallerImpl<M>> {
        let sig = ABISig::from_func_sig::<M>(sig)?;
        let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
-        Ok(ABICallImpl {
+        Ok(ABICallerImpl {
            sig,
            uses,
            defs,
@ -1007,10 +1035,10 @@ impl<M: ABIMachineImpl> ABICallImpl<M> {
        ptr: Reg,
        loc: ir::SourceLoc,
        opcode: ir::Opcode,
-    ) -> CodegenResult<ABICallImpl<M>> {
+    ) -> CodegenResult<ABICallerImpl<M>> {
        let sig = ABISig::from_func_sig::<M>(sig)?;
        let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
-        Ok(ABICallImpl {
+        Ok(ABICallerImpl {
            sig,
            uses,
            defs,
@ -1022,15 +1050,14 @@ impl<M: ABIMachineImpl> ABICallImpl<M> {
    }
 }

-fn adjust_stack_and_nominal_sp<M: ABIMachineImpl, C: LowerCtx<I = M::I>>(
+fn adjust_stack_and_nominal_sp<M: ABIMachineSpec, C: LowerCtx<I = M::I>>(
    ctx: &mut C,
-    off: u64,
+    off: i32,
    is_sub: bool,
 ) {
    if off == 0 {
        return;
    }
-    let off = off as i64;
    let amt = if is_sub { -off } else { off };
    for inst in M::gen_sp_reg_adjust(amt) {
        ctx.emit(inst);
@ -1038,7 +1065,7 @@ fn adjust_stack_and_nominal_sp<M: ABIMachineImpl, C: LowerCtx<I = M::I>>(
    ctx.emit(M::gen_nominal_sp_adj(-amt));
 }

-impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> {
+impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
    type I = M::I;

    fn num_args(&self) -> usize {
@ -1051,12 +1078,12 @@ impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> {

    fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
        let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
-        adjust_stack_and_nominal_sp::<M, C>(ctx, off as u64, /* is_sub = */ true)
+        adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ true)
    }

    fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
        let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
-        adjust_stack_and_nominal_sp::<M, C>(ctx, off as u64, /* is_sub = */ false)
+        adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ false)
    }

    fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
@ -1152,13 +1179,13 @@ impl<M: ABIMachineImpl> ABICall for ABICallImpl<M> {
            ));
            self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
        }
+        let tmp = ctx.alloc_tmp(RegClass::I64, I64);
        for (is_safepoint, inst) in
-            M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter()
+            M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter()
        {
-            if is_safepoint {
-                ctx.emit_safepoint(inst);
-            } else {
-                ctx.emit(inst);
+            match is_safepoint {
+                InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
+                InstIsSafepoint::No => ctx.emit(inst),
            }
        }
    }
--- a/third_party/rust/cranelift-codegen/src/machinst/compile.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/compile.rs
@ -13,7 +13,7 @@ use regalloc::{allocate_registers_with_opts, Algorithm, Options};
 pub fn compile<B: LowerBackend + MachBackend>(
    f: &Function,
    b: &B,
-    abi: Box<dyn ABIBody<I = B::MInst>>,
+    abi: Box<dyn ABICallee<I = B::MInst>>,
 ) -> CodegenResult<VCode<B::MInst>>
 where
    B::MInst: ShowWithRRU,
--- a/third_party/rust/cranelift-codegen/src/machinst/helpers.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/helpers.rs
@ -1,6 +1,8 @@
 //! Miscellaneous helpers for machine backends.

+use super::{InsnOutput, LowerCtx, VCodeInst};
 use crate::ir::Type;
+use regalloc::{Reg, Writable};

 /// Returns the size (in bits) of a given type.
 pub fn ty_bits(ty: Type) -> usize {
@ -16,3 +18,11 @@ pub(crate) fn ty_has_int_representation(ty: Type) -> bool {
 pub(crate) fn ty_has_float_or_vec_representation(ty: Type) -> bool {
    ty.is_vector() || ty.is_float()
 }
+
+/// Allocate a register for an instruction output and return it.
+pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
+    ctx: &mut C,
+    spec: InsnOutput,
+) -> Writable<Reg> {
+    ctx.get_output(spec.insn, spec.output)
+}
--- a/third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
@ -1,6 +1,29 @@
 //! A place to park MachInst::Inst fragments which are common across multiple architectures.

-use crate::ir;
+use crate::ir::{self, Inst as IRInst};
+
+//============================================================================
+// Instruction input "slots".
+//
+// We use these types to refer to operand numbers, and result numbers, together
+// with the associated instruction, in a type-safe way.
+
+/// Identifier for a particular input of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) struct InsnInput {
+    pub(crate) insn: IRInst,
+    pub(crate) input: usize,
+}
+
+/// Identifier for a particular output of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) struct InsnOutput {
+    pub(crate) insn: IRInst,
+    pub(crate) output: usize,
+}
+
+//============================================================================
+// Atomic instructions.

 /// Atomic memory update operations.  As of 21 Aug 2020 these are used for the aarch64 and x64
 /// targets.
--- a/third_party/rust/cranelift-codegen/src/machinst/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
@ -13,7 +13,7 @@ use crate::ir::{
    ValueDef,
 };
 use crate::machinst::{
-    ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
+    ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
    VCodeInst,
 };
 use crate::CodegenResult;
@ -61,8 +61,8 @@ pub trait LowerCtx {

    // Function-level queries:

-    /// Get the `ABIBody`.
-    fn abi(&mut self) -> &dyn ABIBody<I = Self::I>;
+    /// Get the `ABICallee`.
+    fn abi(&mut self) -> &dyn ABICallee<I = Self::I>;
    /// Get the (virtual) register that receives the return value. A return
    /// instruction should lower into a sequence that fills this register. (Why
    /// not allow the backend to specify its own result register for the return?
@ -312,7 +312,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    /// Prepare a new lowering context for the given IR function.
    pub fn new(
        f: &'func Function,
-        abi: Box<dyn ABIBody<I = I>>,
+        abi: Box<dyn ABICallee<I = I>>,
        block_order: BlockLoweringOrder,
    ) -> CodegenResult<Lower<'func, I>> {
        let mut vcode = VCodeBuilder::new(abi, block_order);
@ -844,7 +844,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
 impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    type I = I;

-    fn abi(&mut self) -> &dyn ABIBody<I = I> {
+    fn abi(&mut self) -> &dyn ABICallee<I = I> {
        self.vcode.abi()
    }

--- a/third_party/rust/cranelift-codegen/src/machinst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/mod.rs
@ -282,7 +282,7 @@ pub trait MachInstEmit: MachInst {
 /// emitting a function body.
 pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
    /// Create a new emission state given the ABI object.
-    fn new(abi: &dyn ABIBody<I = I>) -> Self;
+    fn new(abi: &dyn ABICallee<I = I>) -> Self;
    /// Update the emission state before emitting an instruction that is a
    /// safepoint.
    fn pre_safepoint(&mut self, _stack_map: StackMap) {}
--- a/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
@ -86,7 +86,7 @@ pub struct VCode<I: VCodeInst> {
    block_order: BlockLoweringOrder,

    /// ABI object.
-    abi: Box<dyn ABIBody<I = I>>,
+    abi: Box<dyn ABICallee<I = I>>,

    /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
    /// regalloc, the safepoint instructions are listed in the separate
@ -132,7 +132,7 @@ pub struct VCodeBuilder<I: VCodeInst> {

 impl<I: VCodeInst> VCodeBuilder<I> {
    /// Create a new VCodeBuilder.
-    pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
+    pub fn new(abi: Box<dyn ABICallee<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
        let reftype_class = I::ref_type_regclass(abi.flags());
        let vcode = VCode::new(abi, block_order);
        let stack_map_info = StackmapRequestInfo {
@ -151,7 +151,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
    }

    /// Access the ABI object.
-    pub fn abi(&mut self) -> &mut dyn ABIBody<I = I> {
+    pub fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
        &mut *self.vcode.abi
    }

@ -263,7 +263,7 @@ fn is_reftype(ty: Type) -> bool {

 impl<I: VCodeInst> VCode<I> {
    /// New empty VCode.
-    fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
+    fn new(abi: Box<dyn ABICallee<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
        VCode {
            liveins: abi.liveins(),
            liveouts: abi.liveouts(),
--- a/third_party/rust/cranelift-codegen/src/souper_harvest.rs
+++ b/third_party/rust/cranelift-codegen/src/souper_harvest.rs
@ -0,0 +1,527 @@
+//! Harvest left-hand side superoptimization candidates.
+//!
+//! Given a clif function, harvest all its integer subexpressions, so that they
+//! can be fed into [Souper](https://github.com/google/souper) as candidates for
+//! superoptimization. For some of these candidates, Souper will successfully
+//! synthesize a right-hand side that is equivalent but has lower cost than the
+//! left-hand side. Then, we can combine these left- and right-hand sides into a
+//! complete optimization, and add it to our peephole passes.
+//!
+//! To harvest the expression that produced a given value `x`, we do a
+//! post-order traversal of the dataflow graph starting from `x`. As we do this
+//! traversal, we maintain a map from clif values to their translated Souper
+//! values. We stop traversing when we reach anything that can't be translated
+//! into Souper IR: a memory load, a float-to-int conversion, a block parameter,
+//! etc. For values produced by these instructions, we create a Souper `var`,
+//! which is an input variable to the optimization. For instructions that have a
+//! direct mapping into Souper IR, we get the Souper version of each of its
+//! operands and then create the Souper version of the instruction itself. It
+//! should now be clear why we do a post-order traversal: we need an
+//! instruction's translated operands in order to translate the instruction
+//! itself. Once this instruction is translated, we update the clif-to-souper
+//! map with this new translation so that any other instruction that uses this
+//! result as an operand has access to the translated value. When the traversal
+//! is complete we return the translation of `x` as the root of left-hand side
+//! candidate.
+
+use crate::ir;
+use souper_ir::ast;
+use std::collections::{HashMap, HashSet};
+use std::string::String;
+use std::sync::mpsc;
+use std::vec::Vec;
+
+/// Harvest Souper left-hand side candidates from the given function.
+///
+/// Candidates are reported through the given MPSC sender.
+pub fn do_souper_harvest(func: &ir::Function, out: &mut mpsc::Sender<String>) {
+    let mut allocs = Allocs::default();
+
+    // Iterate over each instruction in each block and try and harvest a
+    // left-hand side from its result.
+    for block in func.layout.blocks() {
+        let mut option_inst = func.layout.first_inst(block);
+        while let Some(inst) = option_inst {
+            let results = func.dfg.inst_results(inst);
+            if results.len() == 1 {
+                let val = results[0];
+                let ty = func.dfg.value_type(val);
+                if ty.is_int() && ty.lane_count() == 1 {
+                    harvest_candidate_lhs(&mut allocs, func, val, out);
+                }
+            }
+            option_inst = func.layout.next_inst(inst);
+        }
+    }
+}
+
+/// Allocations that we reuse across many LHS candidate harvests.
+#[derive(Default)]
+struct Allocs {
+    /// A map from cranelift IR to souper IR for values that we've already
+    /// translated into souper IR.
+    ir_to_souper_val: HashMap<ir::Value, ast::ValueId>,
+
+    /// Stack of to-visit and to-trace values for the post-order DFS.
+    dfs_stack: Vec<StackEntry>,
+
+    /// Set of values we've already seen in our post-order DFS.
+    dfs_seen: HashSet<ir::Value>,
+}
+
+impl Allocs {
+    /// Reset the collections to their empty state (without deallocating their
+    /// backing data).
+    fn reset(&mut self) {
+        self.ir_to_souper_val.clear();
+        self.dfs_stack.clear();
+        self.dfs_seen.clear();
+    }
+}
+
+/// Harvest a candidate LHS for `val` from the dataflow graph.
+fn harvest_candidate_lhs(
+    allocs: &mut Allocs,
+    func: &ir::Function,
+    val: ir::Value,
+    out: &mut mpsc::Sender<String>,
+) {
+    allocs.reset();
+    let mut lhs = ast::LeftHandSideBuilder::default();
+    let mut non_var_count = 0;
+
+    // Should we keep tracing through the given `val`? Only if it is defined
+    // by an instruction that we can translate to Souper IR.
+    let should_trace = |val| match func.dfg.value_def(val) {
+        ir::ValueDef::Result(inst, 0) => match func.dfg[inst].opcode() {
+                ir::Opcode::Iadd
+                | ir::Opcode::IaddImm
+                | ir::Opcode::IrsubImm
+                | ir::Opcode::Imul
+                | ir::Opcode::ImulImm
+                | ir::Opcode::Udiv
+                | ir::Opcode::UdivImm
+                | ir::Opcode::Sdiv
+                | ir::Opcode::SdivImm
+                | ir::Opcode::Urem
+                | ir::Opcode::UremImm
+                | ir::Opcode::Srem
+                | ir::Opcode::SremImm
+                | ir::Opcode::Band
+                | ir::Opcode::BandImm
+                | ir::Opcode::Bor
+                | ir::Opcode::BorImm
+                | ir::Opcode::Bxor
+                | ir::Opcode::BxorImm
+                | ir::Opcode::Ishl
+                | ir::Opcode::IshlImm
+                | ir::Opcode::Sshr
+                | ir::Opcode::SshrImm
+                | ir::Opcode::Ushr
+                | ir::Opcode::UshrImm
+                | ir::Opcode::Select
+                | ir::Opcode::Uextend
+                | ir::Opcode::Sextend
+                | ir::Opcode::Trunc
+                | ir::Opcode::Icmp
+                | ir::Opcode::Popcnt
+                | ir::Opcode::Bitrev
+                | ir::Opcode::Clz
+                | ir::Opcode::Ctz
+                // TODO: ir::Opcode::IaddCarry
+                // TODO: ir::Opcode::IaddCout
+                | ir::Opcode::SaddSat
+                | ir::Opcode::SsubSat
+                | ir::Opcode::UsubSat => true,
+                _ => false,
+            },
+        _ => false,
+    };
+
+    post_order_dfs(allocs, &func.dfg, val, should_trace, |allocs, val| {
+        let souper_assignment_rhs = match func.dfg.value_def(val) {
+            ir::ValueDef::Result(inst, 0) => {
+                let args = func.dfg.inst_args(inst);
+                let arg = |allocs: &mut Allocs, n| allocs.ir_to_souper_val[&args[n]].into();
+
+                match (func.dfg[inst].opcode(), &func.dfg[inst]) {
+                    (ir::Opcode::Iadd, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Add { a, b }.into()
+                    }
+                    (ir::Opcode::IaddImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Add { a, b }.into()
+                    }
+                    (ir::Opcode::IrsubImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let b = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let a = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Sub { a, b }.into()
+                    }
+                    (ir::Opcode::Imul, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Mul { a, b }.into()
+                    }
+                    (ir::Opcode::ImulImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Mul { a, b }.into()
+                    }
+                    (ir::Opcode::Udiv, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Udiv { a, b }.into()
+                    }
+                    (ir::Opcode::UdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Udiv { a, b }.into()
+                    }
+                    (ir::Opcode::Sdiv, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Sdiv { a, b }.into()
+                    }
+                    (ir::Opcode::SdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Sdiv { a, b }.into()
+                    }
+                    (ir::Opcode::Urem, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Urem { a, b }.into()
+                    }
+                    (ir::Opcode::UremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Urem { a, b }.into()
+                    }
+                    (ir::Opcode::Srem, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Srem { a, b }.into()
+                    }
+                    (ir::Opcode::SremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Srem { a, b }.into()
+                    }
+                    (ir::Opcode::Band, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::And { a, b }.into()
+                    }
+                    (ir::Opcode::BandImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::And { a, b }.into()
+                    }
+                    (ir::Opcode::Bor, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Or { a, b }.into()
+                    }
+                    (ir::Opcode::BorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Or { a, b }.into()
+                    }
+                    (ir::Opcode::Bxor, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Xor { a, b }.into()
+                    }
+                    (ir::Opcode::BxorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Xor { a, b }.into()
+                    }
+                    (ir::Opcode::Ishl, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Shl { a, b }.into()
+                    }
+                    (ir::Opcode::IshlImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Shl { a, b }.into()
+                    }
+                    (ir::Opcode::Sshr, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Ashr { a, b }.into()
+                    }
+                    (ir::Opcode::SshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Ashr { a, b }.into()
+                    }
+                    (ir::Opcode::Ushr, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::Lshr { a, b }.into()
+                    }
+                    (ir::Opcode::UshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => {
+                        let a = arg(allocs, 0);
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        let b = ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into();
+                        ast::Instruction::Lshr { a, b }.into()
+                    }
+                    (ir::Opcode::Select, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        let c = arg(allocs, 2);
+                        ast::Instruction::Select { a, b, c }.into()
+                    }
+                    (ir::Opcode::Uextend, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Zext { a }.into()
+                    }
+                    (ir::Opcode::Sextend, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Sext { a }.into()
+                    }
+                    (ir::Opcode::Trunc, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Trunc { a }.into()
+                    }
+                    (ir::Opcode::Icmp, ir::InstructionData::IntCompare { cond, .. })
+                    | (ir::Opcode::IcmpImm, ir::InstructionData::IntCompare { cond, .. }) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        match cond {
+                            ir::condcodes::IntCC::Equal => ast::Instruction::Eq { a, b }.into(),
+                            ir::condcodes::IntCC::NotEqual => ast::Instruction::Ne { a, b }.into(),
+                            ir::condcodes::IntCC::UnsignedLessThan => {
+                                ast::Instruction::Ult { a, b }.into()
+                            }
+                            ir::condcodes::IntCC::SignedLessThan => {
+                                ast::Instruction::Slt { a, b }.into()
+                            }
+                            ir::condcodes::IntCC::UnsignedLessThanOrEqual => {
+                                ast::Instruction::Sle { a, b }.into()
+                            }
+                            ir::condcodes::IntCC::SignedLessThanOrEqual => {
+                                ast::Instruction::Sle { a, b }.into()
+                            }
+                            _ => ast::AssignmentRhs::Var,
+                        }
+                    }
+                    (ir::Opcode::Popcnt, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Ctpop { a }.into()
+                    }
+                    (ir::Opcode::Bitrev, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::BitReverse { a }.into()
+                    }
+                    (ir::Opcode::Clz, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Ctlz { a }.into()
+                    }
+                    (ir::Opcode::Ctz, _) => {
+                        let a = arg(allocs, 0);
+                        ast::Instruction::Cttz { a }.into()
+                    }
+                    // TODO: ir::Opcode::IaddCarry
+                    // TODO: ir::Opcode::IaddCout
+                    (ir::Opcode::SaddSat, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::SaddSat { a, b }.into()
+                    }
+                    (ir::Opcode::SsubSat, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::SsubSat { a, b }.into()
+                    }
+                    (ir::Opcode::UsubSat, _) => {
+                        let a = arg(allocs, 0);
+                        let b = arg(allocs, 1);
+                        ast::Instruction::UsubSat { a, b }.into()
+                    }
+                    (ir::Opcode::Iconst, ir::InstructionData::UnaryImm { imm, .. }) => {
+                        let value: i64 = (*imm).into();
+                        let value: i128 = value.into();
+                        ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into()
+                    }
+                    (ir::Opcode::Bconst, ir::InstructionData::UnaryBool { imm, .. }) => {
+                        let value = *imm as i128;
+                        ast::Constant {
+                            value,
+                            r#type: souper_type_of(&func.dfg, val),
+                        }
+                        .into()
+                    }
+                    _ => ast::AssignmentRhs::Var,
+                }
+            }
+            _ => ast::AssignmentRhs::Var,
+        };
+
+        non_var_count += match souper_assignment_rhs {
+            ast::AssignmentRhs::Var => 0,
+            _ => 1,
+        };
+        let souper_ty = souper_type_of(&func.dfg, val);
+        let souper_val = lhs.assignment(None, souper_ty, souper_assignment_rhs, vec![]);
+        let old_value = allocs.ir_to_souper_val.insert(val, souper_val);
+        assert!(old_value.is_none());
+    });
+
+    // We end up harvesting a lot of candidates like:
+    //
+    //     %0:i32 = var
+    //     infer %0
+    //
+    // and
+    //
+    //     %0:i32 = var
+    //     %1:i32 = var
+    //     %2:i32 = add %0, %1
+    //
+    // Both of these are useless. Only actually harvest the candidate if there
+    // are at least two actual operations.
+    if non_var_count >= 2 {
+        let lhs = lhs.finish(allocs.ir_to_souper_val[&val], None);
+        out.send(format!(
+            ";; Harvested from `{}` in `{}`\n{}\n",
+            val, func.name, lhs
+        ))
+        .unwrap();
+    }
+}
+
+fn souper_type_of(dfg: &ir::DataFlowGraph, val: ir::Value) -> Option<ast::Type> {
+    let ty = dfg.value_type(val);
+    assert!(ty.is_int() || ty.is_bool());
+    assert_eq!(ty.lane_count(), 1);
+    Some(ast::Type { width: ty.bits() })
+}
+
+#[derive(Debug)]
+enum StackEntry {
+    Visit(ir::Value),
+    Trace(ir::Value),
+}
+
+fn post_order_dfs(
+    allocs: &mut Allocs,
+    dfg: &ir::DataFlowGraph,
+    val: ir::Value,
+    should_trace: impl Fn(ir::Value) -> bool,
+    mut visit: impl FnMut(&mut Allocs, ir::Value),
+) {
+    allocs.dfs_stack.push(StackEntry::Trace(val));
+
+    while let Some(entry) = allocs.dfs_stack.pop() {
+        match entry {
+            StackEntry::Visit(val) => {
+                let is_new = allocs.dfs_seen.insert(val);
+                if is_new {
+                    visit(allocs, val);
+                }
+            }
+            StackEntry::Trace(val) => {
+                if allocs.dfs_seen.contains(&val) {
+                    continue;
+                }
+
+                allocs.dfs_stack.push(StackEntry::Visit(val));
+                if should_trace(val) {
+                    if let ir::ValueDef::Result(inst, 0) = dfg.value_def(val) {
+                        let args = dfg.inst_args(inst);
+                        for v in args.iter().rev().copied() {
+                            allocs.dfs_stack.push(StackEntry::Trace(v));
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/third_party/rust/cranelift-frontend/.cargo-checksum.json
+++ b/third_party/rust/cranelift-frontend/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"52587586762dcb18c8ae39de76ef388a78b857d8fecd87b77b6a30dc8f85e1f5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"114e1ff1e5eacaf3c79946fcf441a8f525148a50e94a3f81373d4b745ac09a9f","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
+{"files":{"Cargo.toml":"52587586762dcb18c8ae39de76ef388a78b857d8fecd87b77b6a30dc8f85e1f5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"2d394292c4eafee3476eb159bbb52e1da2108b0ba8fc4cf8dd663fca1ee887d9","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
--- a/third_party/rust/cranelift-frontend/src/switch.rs
+++ b/third_party/rust/cranelift-frontend/src/switch.rs
@ -240,8 +240,8 @@ impl Switch {
            // There are currently no 128bit systems supported by rustc, but once we do ensure that
            // we don't silently ignore a part of the jump table for 128bit integers on 128bit systems.
            assert!(
-                u64::try_from(blocks.len()).is_ok(),
-                "Jump tables bigger than 2^64-1 are not yet supported"
+                u32::try_from(blocks.len()).is_ok(),
+                "Jump tables bigger than 2^32-1 are not yet supported"
            );

            let mut jt_data = JumpTableData::new();
@ -265,18 +265,18 @@ impl Switch {
                }
            };

-            let discr = if bx.func.dfg.value_type(discr).bits() > 64 {
-                // Check for overflow of cast to u64.
+            let discr = if bx.func.dfg.value_type(discr).bits() > 32 {
+                // Check for overflow of cast to u32.
                let new_block = bx.create_block();
-                let bigger_than_u64 =
+                let bigger_than_u32 =
                    bx.ins()
-                        .icmp_imm(IntCC::UnsignedGreaterThan, discr, u64::max_value() as i64);
-                bx.ins().brnz(bigger_than_u64, otherwise, &[]);
+                        .icmp_imm(IntCC::UnsignedGreaterThan, discr, u32::max_value() as i64);
+                bx.ins().brnz(bigger_than_u32, otherwise, &[]);
                bx.ins().jump(new_block, &[]);
                bx.switch_to_block(new_block);

-                // Cast to u64, as br_table is not implemented for integers bigger than 64bits.
-                bx.ins().ireduce(types::I64, discr)
+                // Cast to u32, as br_table is not implemented for integers bigger than 32bits.
+                bx.ins().ireduce(types::I32, discr)
            } else {
                discr
            };
@ -571,6 +571,47 @@ block4:
        builder.finalize(); // Will panic if some blocks are not sealed
    }

+    #[test]
+    fn switch_64bit() {
+        let mut func = Function::new();
+        let mut func_ctx = FunctionBuilderContext::new();
+        {
+            let mut bx = FunctionBuilder::new(&mut func, &mut func_ctx);
+            let block0 = bx.create_block();
+            bx.switch_to_block(block0);
+            let val = bx.ins().iconst(types::I64, 0);
+            let mut switch = Switch::new();
+            let block1 = bx.create_block();
+            switch.set_entry(1, block1);
+            let block2 = bx.create_block();
+            switch.set_entry(0, block2);
+            let block3 = bx.create_block();
+            switch.emit(&mut bx, val, block3);
+        }
+        let func = func
+            .to_string()
+            .trim_start_matches("function u0:0() fast {\n")
+            .trim_end_matches("\n}\n")
+            .to_string();
+        assert_eq!(
+            func,
+            "    jt0 = jump_table [block2, block1]
+
+block0:
+    v0 = iconst.i64 0
+    jump block4
+
+block4:
+    v1 = icmp_imm.i64 ugt v0, 0xffff_ffff
+    brnz v1, block3
+    jump block5
+
+block5:
+    v2 = ireduce.i32 v0
+    br_table v2, block3, jt0"
+        );
+    }
+
    #[test]
    fn switch_128bit() {
        let mut func = Function::new();
@ -602,12 +643,12 @@ block0:
    jump block4

 block4:
-    v1 = icmp_imm.i128 ugt v0, -1
+    v1 = icmp_imm.i128 ugt v0, 0xffff_ffff
    brnz v1, block3
    jump block5

 block5:
-    v2 = ireduce.i64 v0
+    v2 = ireduce.i32 v0
    br_table v2, block3, jt0"
        );
    }
--- a/third_party/rust/cranelift-wasm/.cargo-checksum.json
+++ b/third_party/rust/cranelift-wasm/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"3c6ff4a157eea9e347e0d23f4f23628f77233a3e897e48a32b4a4a7679b7548a","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"cb1b08bbfc69dbb2e71cec29c5c6ab00ed14df9b09d78dec05c65a80dd1007ca","src/environ/dummy.rs":"0c05a77ab37a305c799f1b0e99c9debe1b8f59a3e3aa764e2fe39a923716b2ee","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"0f97fff3cc545772a1959f7d0439713fd7dc8d8adf43a2636f8174126dc1393c","src/func_translator.rs":"48ee25da11063743459f9e9407512413075265e67713c6f5ab733798be2bf19d","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"1374fa56ca18a782083fa0f25f2ad675044a92bbf1a0a1cc44fcaf695807e044","src/sections_translator.rs":"11d65fd2e595e41f976e5c7d0df823f70449f79a9d2facbed61263616f8cfec1","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"7ca3cb06b4481bc3ae74697fbcd437aea1d851eaa3cfe18cc013a4af43728957","src/translation_utils.rs":"69f20c47ea22f0badd21a6187b5f9764252a00d19643a7e3e691797a9fe34f1b","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
+{"files":{"Cargo.toml":"3c6ff4a157eea9e347e0d23f4f23628f77233a3e897e48a32b4a4a7679b7548a","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"d85885a51f19f25a1c60c94f6f35815273547a18b04d73584d7a8d283c099651","src/environ/dummy.rs":"0c05a77ab37a305c799f1b0e99c9debe1b8f59a3e3aa764e2fe39a923716b2ee","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"0f97fff3cc545772a1959f7d0439713fd7dc8d8adf43a2636f8174126dc1393c","src/func_translator.rs":"48ee25da11063743459f9e9407512413075265e67713c6f5ab733798be2bf19d","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"1374fa56ca18a782083fa0f25f2ad675044a92bbf1a0a1cc44fcaf695807e044","src/sections_translator.rs":"11d65fd2e595e41f976e5c7d0df823f70449f79a9d2facbed61263616f8cfec1","src/state/func_state.rs":"581a5648b11fa07aef3cff0752597864c7cd44a4d44e27c50fc7349955b3fda3","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"7ca3cb06b4481bc3ae74697fbcd437aea1d851eaa3cfe18cc013a4af43728957","src/translation_utils.rs":"69f20c47ea22f0badd21a6187b5f9764252a00d19643a7e3e691797a9fe34f1b","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
--- a/third_party/rust/cranelift-wasm/src/code_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/code_translator.rs
@ -323,13 +323,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                // since we truncate the stack back to the original height
                // below.
            }
+
            builder.switch_to_block(next_block);
            builder.seal_block(next_block);
+
            // If it is a loop we also have to seal the body loop block
            if let ControlStackFrame::Loop { header, .. } = frame {
                builder.seal_block(header)
            }
-            state.stack.truncate(frame.original_stack_size());
+
+            frame.truncate_value_stack_to_original_size(&mut state.stack);
            state
                .stack
                .extend_from_slice(builder.block_params(next_block));
@ -1888,9 +1891,8 @@ fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
                                let (params, _results) =
                                    blocktype_params_results(module_translation_state, blocktype)?;
                                let else_block = block_with_params(builder, params, environ)?;
-                                state.stack.truncate(
-                                    state.control_stack.last().unwrap().original_stack_size(),
-                                );
+                                let frame = state.control_stack.last().unwrap();
+                                frame.truncate_value_stack_to_else_params(&mut state.stack);

                                // We change the target of the branch instruction.
                                builder.change_jump_destination(branch_inst, else_block);
@ -1898,9 +1900,8 @@ fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
                                else_block
                            }
                            ElseData::WithElse { else_block } => {
-                                state.stack.truncate(
-                                    state.control_stack.last().unwrap().original_stack_size(),
-                                );
+                                let frame = state.control_stack.last().unwrap();
+                                frame.truncate_value_stack_to_else_params(&mut state.stack);
                                else_block
                            }
                        };
@ -1921,9 +1922,8 @@ fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
            let control_stack = &mut state.control_stack;
            let frame = control_stack.pop().unwrap();

-            // Now we have to split off the stack the values not used
-            // by unreachable code that hasn't been translated
-            stack.truncate(frame.original_stack_size());
+            // Pop unused parameters from stack.
+            frame.truncate_value_stack_to_original_size(stack);

            let reachable_anyway = match frame {
                // If it is a loop we also have to seal the body loop block
--- a/third_party/rust/cranelift-wasm/src/state/func_state.rs
+++ b/third_party/rust/cranelift-wasm/src/state/func_state.rs
@ -128,7 +128,9 @@ impl ControlStackFrame {
            Self::Loop { header, .. } => header,
        }
    }
-    pub fn original_stack_size(&self) -> usize {
+    /// Private helper. Use `truncate_value_stack_to_else_params()` or
+    /// `truncate_value_stack_to_original_size()` to restore value-stack state.
+    fn original_stack_size(&self) -> usize {
        match *self {
            Self::If {
                original_stack_size,
@ -178,6 +180,33 @@ impl ControlStackFrame {
            Self::Loop { .. } => {}
        }
    }
+
+    /// Pop values from the value stack so that it is left at the
+    /// input-parameters to an else-block.
+    pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
+        debug_assert!(matches!(self, &ControlStackFrame::If { .. }));
+        stack.truncate(self.original_stack_size());
+    }
+
+    /// Pop values from the value stack so that it is left at the state it was
+    /// before this control-flow frame.
+    pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
+        // The "If" frame pushes its parameters twice, so they're available to the else block
+        // (see also `FuncTranslationState::push_if`).
+        // Yet, the original_stack_size member accounts for them only once, so that the else
+        // block can see the same number of parameters as the consequent block. As a matter of
+        // fact, we need to substract an extra number of parameter values for if blocks.
+        let num_duplicated_params = match self {
+            &ControlStackFrame::If {
+                num_param_values, ..
+            } => {
+                debug_assert!(num_param_values <= self.original_stack_size());
+                num_param_values
+            }
+            _ => 0,
+        };
+        stack.truncate(self.original_stack_size() - num_duplicated_params);
+    }
 }

 /// Contains information passed along during a function's translation and that records: