Backed out 3 changesets (bug 1641504) for valgrind bustages. CLOSED TREE

Backed out changeset 95646dbd26a1 (bug 1641504) Backed out changeset bf1919e75e65 (bug 1641504) Backed out changeset dedeac296eaa (bug 1641504)
2020-06-11 23:56:02 +03:00 · 2020-06-11 23:56:02 +03:00 · d641e64a5e
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
 [source."https://github.com/bytecodealliance/wasmtime"]
 git = "https://github.com/bytecodealliance/wasmtime"
 replace-with = "vendored-sources"
-rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"

 [source."https://github.com/badboy/failure"]
 git = "https://github.com/badboy/failure"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -763,22 +763,22 @@ dependencies = [

 [[package]]
 name = "cranelift-bforest"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
 dependencies = [
- "cranelift-entity 0.64.0",
+ "cranelift-entity 0.63.0",
 ]

 [[package]]
 name = "cranelift-codegen"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
 dependencies = [
 "byteorder",
 "cranelift-bforest",
 "cranelift-codegen-meta",
 "cranelift-codegen-shared",
- "cranelift-entity 0.64.0",
+ "cranelift-entity 0.63.0",
 "log",
 "regalloc",
 "smallvec",
@ -788,17 +788,17 @@ dependencies = [

 [[package]]
 name = "cranelift-codegen-meta"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
 dependencies = [
 "cranelift-codegen-shared",
- "cranelift-entity 0.64.0",
+ "cranelift-entity 0.63.0",
 ]

 [[package]]
 name = "cranelift-codegen-shared"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"

 [[package]]
 name = "cranelift-entity"
@ -807,13 +807,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9

 [[package]]
 name = "cranelift-entity"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"

 [[package]]
 name = "cranelift-frontend"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
 dependencies = [
 "cranelift-codegen",
 "log",
@ -823,15 +823,15 @@ dependencies = [

 [[package]]
 name = "cranelift-wasm"
-version = "0.64.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+version = "0.63.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
 dependencies = [
 "cranelift-codegen",
- "cranelift-entity 0.64.0",
+ "cranelift-entity 0.63.0",
 "cranelift-frontend",
 "log",
 "thiserror",
- "wasmparser 0.57.0",
+ "wasmparser 0.51.4",
 ]

 [[package]]
@ -3956,9 +3956,9 @@ dependencies = [

 [[package]]
 name = "regalloc"
-version = "0.0.25"
+version = "0.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cca5b48c9db66c5ba084e4660b4c0cfe8b551a96074bc04b7c11de86ad0bf1f9"
+checksum = "b27b256b41986ac5141b37b8bbba85d314fbf546c182eb255af6720e07e4f804"
 dependencies = [
 "log",
 "rustc-hash",
@ -5375,9 +5375,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"

 [[package]]
 name = "wasmparser"
-version = "0.57.0"
+version = "0.51.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
+checksum = "aeb1956b19469d1c5e63e459d29e7b5aa0f558d9f16fcef09736f8a265e6c10a"

 [[package]]
 name = "wast"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -76,8 +76,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f

 [patch.crates-io.cranelift-codegen]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"

 [patch.crates-io.cranelift-wasm]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
+rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
--- a/js/src/wasm/WasmCraneliftCompile.cpp
+++ b/js/src/wasm/WasmCraneliftCompile.cpp
@ -387,16 +387,8 @@ bool env_uses_shared_memory(const CraneliftModuleEnvironment* wrapper) {
  return wrapper->env->usesSharedMemory();
 }

-size_t env_num_types(const CraneliftModuleEnvironment* wrapper) {
-  return wrapper->env->types.length();
-}
-const FuncTypeWithId* env_type(const CraneliftModuleEnvironment* wrapper,
-                               size_t typeIndex) {
-  return &wrapper->env->types[typeIndex].funcType();
-}
-
-const FuncTypeWithId* env_func_sig(const CraneliftModuleEnvironment* wrapper,
-                                   size_t funcIndex) {
+const FuncTypeWithId* env_function_signature(
+    const CraneliftModuleEnvironment* wrapper, size_t funcIndex) {
  return wrapper->env->funcTypes[funcIndex];
 }

--- a/js/src/wasm/WasmJS.cpp
+++ b/js/src/wasm/WasmJS.cpp
@ -163,16 +163,15 @@ bool wasm::CraneliftDisabledByFeatures(JSContext* cx, bool* isDisabled,
  // no threads, no simd, and on ARM64, no reference types.
  bool debug = cx->realm() && cx->realm()->debuggerObservesAsmJS();
  bool gc = cx->options().wasmGc();
+  bool multiValue = WasmMultiValueFlag(cx);
  bool threads =
      cx->realm() &&
      cx->realm()->creationOptions().getSharedMemoryAndAtomicsEnabled();
 #if defined(JS_CODEGEN_ARM64)
  bool reftypesOnArm64 = cx->options().wasmReftypes();
-  bool multiValue = false;
 #else
  // On other platforms, assume reftypes has been implemented.
  bool reftypesOnArm64 = false;
-  bool multiValue = WasmMultiValueFlag(cx);
 #endif
  bool simd = WasmSimdFlag(cx);
  if (reason) {
@ -224,8 +223,8 @@ bool wasm::GcTypesAvailable(JSContext* cx) {
 }

 bool wasm::MultiValuesAvailable(JSContext* cx) {
-  return WasmMultiValueFlag(cx) &&
-         (BaselineAvailable(cx) || IonAvailable(cx) || CraneliftAvailable(cx));
+  // Cranelift does not support multi-value.
+  return WasmMultiValueFlag(cx) && (BaselineAvailable(cx) || IonAvailable(cx));
 }

 bool wasm::SimdAvailable(JSContext* cx) {
--- a/js/src/wasm/cranelift/Cargo.toml
+++ b/js/src/wasm/cranelift/Cargo.toml
@ -13,8 +13,8 @@ name = "baldrdash"
 # cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
 # you should update the following $TOP_LEVEL/Cargo.toml file: look for the
 # revision (rev) hashes of both cranelift dependencies (codegen and wasm).
-cranelift-codegen = { version = "0.64.0", default-features = false }
-cranelift-wasm = "0.64.0"
+cranelift-codegen = { version = "0.63.0", default-features = false }
+cranelift-wasm = "0.63.0"
 log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
 env_logger = "0.6"
 smallvec = "1.0"
--- a/js/src/wasm/cranelift/baldrapi.h
+++ b/js/src/wasm/cranelift/baldrapi.h
@ -212,10 +212,7 @@ extern "C" {
 js::wasm::TypeCode env_unpack(BD_ValType type);

 bool env_uses_shared_memory(const CraneliftModuleEnvironment* env);
-size_t env_num_types(const CraneliftModuleEnvironment* env);
-const js::wasm::FuncTypeWithId* env_type(const CraneliftModuleEnvironment* env,
-                                         size_t typeIndex);
-const js::wasm::FuncTypeWithId* env_func_sig(
+const js::wasm::FuncTypeWithId* env_function_signature(
    const CraneliftModuleEnvironment* env, size_t funcIndex);
 size_t env_func_import_tls_offset(const CraneliftModuleEnvironment* env,
                                  size_t funcIndex);
--- a/js/src/wasm/cranelift/src/bindings/mod.rs
+++ b/js/src/wasm/cranelift/src/bindings/mod.rs
@ -24,7 +24,7 @@ use cranelift_codegen::entity::EntityRef;
 use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
 use cranelift_codegen::ir::{self, InstBuilder, SourceLoc};
 use cranelift_codegen::isa;
-use cranelift_wasm::{FuncIndex, GlobalIndex, SignatureIndex, TableIndex, WasmResult};
+use cranelift_wasm::{FuncIndex, GlobalIndex, SignatureIndex, TableIndex, WasmError, WasmResult};

 use smallvec::SmallVec;

@ -171,6 +171,17 @@ impl FuncTypeWithId {
        }
    }

+    pub fn ret_type(self) -> WasmResult<Option<ir::Type>> {
+        match self.results() {
+            Ok(v) => match v.as_slice() {
+                [] => Ok(None),
+                [t] => Ok(Some(*t)),
+                _ => Err(WasmError::Unsupported("multiple values".to_string())),
+            },
+            Err(e) => Err(e),
+        }
+    }
+
    pub fn id_kind(self) -> FuncTypeIdDescKind {
        unsafe { low_level::funcType_idKind(self.0) }
    }
@ -198,14 +209,8 @@ impl<'a> ModuleEnvironment<'a> {
    pub fn uses_shared_memory(&self) -> bool {
        unsafe { low_level::env_uses_shared_memory(self.env) }
    }
-    pub fn num_types(&self) -> usize {
-        unsafe { low_level::env_num_types(self.env) }
-    }
-    pub fn type_(&self, index: usize) -> FuncTypeWithId {
-        FuncTypeWithId(unsafe { low_level::env_type(self.env, index) })
-    }
-    pub fn func_sig(&self, func_index: FuncIndex) -> FuncTypeWithId {
-        FuncTypeWithId(unsafe { low_level::env_func_sig(self.env, func_index.index()) })
+    pub fn function_signature(&self, func_index: FuncIndex) -> FuncTypeWithId {
+        FuncTypeWithId(unsafe { low_level::env_function_signature(self.env, func_index.index()) })
    }
    pub fn func_import_tls_offset(&self, func_index: FuncIndex) -> usize {
        unsafe { low_level::env_func_import_tls_offset(self.env, func_index.index()) }
--- a/js/src/wasm/cranelift/src/compile.rs
+++ b/js/src/wasm/cranelift/src/compile.rs
@ -28,7 +28,7 @@ use cranelift_codegen::binemit::{
 use cranelift_codegen::entity::EntityRef;
 use cranelift_codegen::ir::{
    self, constant::ConstantOffset, stackslot::StackSize, ExternalName, JumpTable, SourceLoc,
-    TrapCode, Type,
+    TrapCode,
 };
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::CodegenResult;
@ -91,14 +91,12 @@ impl CompiledFunc {
 pub struct BatchCompiler<'static_env, 'module_env> {
    // Attributes that are constant accross multiple compilations.
    static_environ: &'static_env bindings::StaticEnvironment,
-
    environ: bindings::ModuleEnvironment<'module_env>,
-    module_state: ModuleTranslationState,
-
    isa: Box<dyn TargetIsa>,

    // Stateless attributes.
    func_translator: FuncTranslator,
+    dummy_module_state: ModuleTranslationState,

    // Mutable attributes.
    /// Cranelift overall context.
@ -126,7 +124,8 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
            environ,
            isa,
            func_translator: FuncTranslator::new(),
-            module_state: create_module_translation_state(&environ)?,
+            // TODO for Cranelift to support multi-value, feed it the real type section here.
+            dummy_module_state: ModuleTranslationState::new(),
            context: Context::new(),
            trap_relocs: Traps::new(),
            trans_env,
@ -159,7 +158,7 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
        self.context.func.name = wasm_function_name(index);

        self.func_translator.translate(
-            &self.module_state,
+            &self.dummy_module_state,
            func.bytecode(),
            func.offset_in_module as usize,
            &mut self.context.func,
@ -301,27 +300,6 @@ impl<'static_env, 'module_env> fmt::Display for BatchCompiler<'static_env, 'modu
    }
 }

-fn create_module_translation_state(
-    env: &bindings::ModuleEnvironment,
-) -> WasmResult<ModuleTranslationState> {
-    let num_sig = env.num_types();
-
-    let mut arg_vecs = vec![];
-    let mut result_vecs = vec![];
-    for i in 0..num_sig {
-        let sig = env.type_(i);
-        arg_vecs.push(sig.args()?);
-        result_vecs.push(sig.results()?);
-    }
-    let types: Vec<(&[Type], &[Type])> = arg_vecs
-        .iter()
-        .zip(result_vecs.iter())
-        .map(|(args, results)| (&args[..], &results[..]))
-        .collect();
-
-    ModuleTranslationState::from_func_sigs(&types[..])
-}
-
 /// Create a Cranelift function name representing a WebAssembly function with `index`.
 pub fn wasm_function_name(func: FuncIndex) -> ExternalName {
    ExternalName::User {
@ -538,7 +516,7 @@ impl TrapSink for Traps {
                // entries, so we don't have to.
                return;
            }
-            HeapOutOfBounds | TableOutOfBounds => bindings::Trap::OutOfBounds,
+            HeapOutOfBounds | OutOfBounds | TableOutOfBounds => bindings::Trap::OutOfBounds,
            IndirectCallToNull => bindings::Trap::IndirectCallToNull,
            BadSignature => bindings::Trap::IndirectCallBadSig,
            IntegerOverflow => bindings::Trap::IntegerOverflow,
--- a/js/src/wasm/cranelift/src/wasm2clif.rs
+++ b/js/src/wasm/cranelift/src/wasm2clif.rs
@ -69,7 +69,7 @@ fn init_sig_from_wsig(
        sig.params.push(ir::AbiParam::new(arg));
    }

-    for ret_type in wsig.results()? {
+    if let Some(ret_type) = wsig.ret_type()? {
        let ret = match ret_type {
            // Spidermonkey requires i32 returns to have their high 32 bits
            // zero so that it can directly box them.
@ -95,7 +95,7 @@ pub fn init_sig(
    call_conv: CallConv,
    func_index: FuncIndex,
 ) -> WasmResult<ir::Signature> {
-    let wsig = env.func_sig(func_index);
+    let wsig = env.function_signature(func_index);
    init_sig_from_wsig(call_conv, wsig)
 }

@ -858,7 +858,7 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
        let oob = pos
            .ins()
            .icmp(IntCC::UnsignedGreaterThanOrEqual, callee, tlength);
-        pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
+        pos.ins().trapnz(oob, ir::TrapCode::OutOfBounds);

        // 3. Load the wtable base pointer from a global.
        let tbase = pos.ins().global_value(POINTER_TYPE, base_gv);
@ -1084,11 +1084,11 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
    fn translate_table_grow(
        &mut self,
        mut pos: FuncCursor,
-        table_index: TableIndex,
+        table_index: u32,
        delta: ir::Value,
        init_value: ir::Value,
    ) -> WasmResult<ir::Value> {
-        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
+        let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
        Ok(self
            .instance_call(&mut pos, &FN_TABLE_GROW, &[init_value, delta, table_index])
            .unwrap())
@ -1097,10 +1097,10 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
    fn translate_table_get(
        &mut self,
        mut pos: FuncCursor,
-        table_index: TableIndex,
+        table_index: u32,
        index: ir::Value,
    ) -> WasmResult<ir::Value> {
-        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
+        let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
        Ok(self
            .instance_call(&mut pos, &FN_TABLE_GET, &[index, table_index])
            .unwrap())
@ -1109,11 +1109,11 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
    fn translate_table_set(
        &mut self,
        mut pos: FuncCursor,
-        table_index: TableIndex,
+        table_index: u32,
        value: ir::Value,
        index: ir::Value,
    ) -> WasmResult<()> {
-        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
+        let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
        self.instance_call(&mut pos, &FN_TABLE_SET, &[index, value, table_index]);
        Ok(())
    }
@ -1146,12 +1146,12 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
    fn translate_table_fill(
        &mut self,
        mut pos: FuncCursor,
-        table_index: TableIndex,
+        table_index: u32,
        dst: ir::Value,
        val: ir::Value,
        len: ir::Value,
    ) -> WasmResult<()> {
-        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
+        let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
        self.instance_call(&mut pos, &FN_TABLE_FILL, &[dst, val, len, table_index]);
        Ok(())
    }
--- a/third_party/rust/cranelift-bforest/.cargo-checksum.json
+++ b/third_party/rust/cranelift-bforest/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"fe108380fdfaac0d92a92302d0751df182b888e874e56e465f4241dbb670a92e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
+{"files":{"Cargo.toml":"07d7670bb6f0c26fa3abb5d547d645b8b6ab32378dba33e3453122c8ba59c6b5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
--- a/third_party/rust/cranelift-bforest/Cargo.toml
+++ b/third_party/rust/cranelift-bforest/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-bforest"
-version = "0.64.0"
+version = "0.63.0"
 description = "A forest of B+-trees"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-bforest"
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
 edition = "2018"

 [dependencies]
-cranelift-entity = { path = "../entity", version = "0.64.0", default-features = false }
+cranelift-entity = { path = "../entity", version = "0.63.0", default-features = false }

 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"a19ba59829e25d67120787a454038986a6759f7d592dcf427924ebbcb5de6697","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"87c70a4856bb1c40ba6babed549aa7e01478375244dea605be0334ae6d0441e0","src/isa/x86/instructions.rs":"a2c81ff80e30980fe444aa1e56ba57c54911cee67c392c16bfbdf28f75151dc6","src/isa/x86/legalize.rs":"b5f68ea089c4237c7140ef0b8ff71f7c6a5f53884bf2158d81b52d3750bcacac","src/isa/x86/mod.rs":"ecc1d4de51bd44dbaa864fafebb68f66bc99fb8c9ad67a0fcb420bd1f87d1524","src/isa/x86/opcodes.rs":"f98dd104910efbfa3c211080c68a17da607ce585b9d81bf22cb255e58e51f99f","src/isa/x86/recipes.rs":"b71a3746ed39b08932dc1a0ce885b61eec2e8daf2e92d12eccc0d085e4587a1f","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"69623c2193458c838617e52e88d3ff91b71f3f07aec1f1494c0cabd7c332ad49","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38b9a3b09bd86d020b841abe94eef003063b2cb12d9dc991a7743b2cc0bb3362","src/shared/legalize.rs":"55b186e09383cc16491a6a0dd79aa9149c1aba1927a7173701478818b8116795","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"0b4f903de5f2df19304c44bf4bd456c3a8e165103b38ccb13b6f88ae8a3c7ee8","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
+{"files":{"Cargo.toml":"2d1fae4231bb7d3c43ebcaccbc62d243440ab537a5b6bd40c653ece0bcda5a75","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"b275053977c0239211c1df35253154ba4dce2519f506088e71104de37d3db862","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"fd5a7418fa0d47cdf1b823b31553f1549c03e160ffffac9e22d611185774367e","src/isa/x86/encodings.rs":"a19e5dd7ba7fe74f2ec0a2367e61e2dab498113f8b2a2f1bc677b6ee486358d5","src/isa/x86/instructions.rs":"144e83591444115f2ab8d16777e322eb5c9d8eef123ad05d0c66811a029b662b","src/isa/x86/legalize.rs":"d2eb6cee5c885870250417f4d9086527c96f994542c9316baf14776b500e45b0","src/isa/x86/mod.rs":"65953f998ff3fc3b333167e9979fc0f15f976b51ad75272ac19dcaad0981b371","src/isa/x86/opcodes.rs":"44556abfc4a319a6e48aa878f10550b7878725ba0bf75ddc9bb6a0e6f4223c73","src/isa/x86/recipes.rs":"f142ae4ea1db29df0f3c9aedf0c5ee228682136526499f0c85aab101375d0c8c","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"49abb46533b3a5415cd033e0a98b5c9561e231f2dd9510d587dc69b204bb6706","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"89ed4074f748637adf56b93ba952e398c45d43e6326d01676885939e3fe8bc4a","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"8df3abeb47b52b7dc99f6e0bb16cf8a695ce4fe0a8d86035945a2612d1aa5a6d","src/shared/legalize.rs":"bc9c3292446c1d338df1c4ce19f3ac5482cfe582a04a5a1e82fc9aaa6aef25ea","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"9460758f04ccfc9129ea4d4081571fe4a3ac574c3d25b6473f888fbbb506b9d3","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
--- a/third_party/rust/cranelift-codegen-meta/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-meta/Cargo.toml
@ -1,19 +1,19 @@
 [package]
 name = "cranelift-codegen-meta"
 authors = ["The Cranelift Project Developers"]
-version = "0.64.0"
+version = "0.63.0"
 description = "Metaprogram for cranelift-codegen code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
 readme = "README.md"
 edition = "2018"

-[package.metadata.docs.rs]
-rustdoc-args = [ "--document-private-items" ]
-
 [dependencies]
-cranelift-codegen-shared = { path = "../shared", version = "0.64.0" }
-cranelift-entity = { path = "../../entity", version = "0.64.0" }
+cranelift-codegen-shared = { path = "../shared", version = "0.63.0" }
+cranelift-entity = { path = "../../entity", version = "0.63.0" }

 [badges]
 maintenance = { status = "experimental" }
+
+[package.metadata.docs.rs]
+rustdoc-args = [ "--document-private-items" ]
--- a/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
@ -874,32 +874,17 @@ fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
        args.join(", ")
    );

-    let imms_need_sign_extension = format
-        .imm_fields
-        .iter()
-        .any(|f| f.kind.rust_type == "ir::immediates::Imm64");
-
    fmt.doc_comment(format.to_string());
    fmt.line("#[allow(non_snake_case)]");
    fmtln!(fmt, "fn {} {{", proto);
    fmt.indent(|fmt| {
        // Generate the instruction data.
-        fmtln!(
-            fmt,
-            "let{} data = ir::InstructionData::{} {{",
-            if imms_need_sign_extension { " mut" } else { "" },
-            format.name
-        );
+        fmtln!(fmt, "let data = ir::InstructionData::{} {{", format.name);
        fmt.indent(|fmt| {
            fmt.line("opcode,");
            gen_member_inits(format, fmt);
        });
        fmtln!(fmt, "};");
-
-        if imms_need_sign_extension {
-            fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
-        }
-
        fmt.line("self.build(data, ctrl_typevar)");
    });
    fmtln!(fmt, "}");
--- a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
@ -64,7 +64,7 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG

    // R-type with an immediate shift amount instead of rs2.
    recipes.push(
-        EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
+        EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4)
            .operands_in(vec![gpr])
            .operands_out(vec![gpr])
            .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
@ -79,11 +79,11 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
    );

    recipes.push(
-        EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
+        EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4)
            .operands_in(vec![gpr])
            .operands_out(vec![gpr])
            .inst_predicate(InstructionPredicate::new_is_signed_int(
-                &*formats.binary_imm64,
+                &*formats.binary_imm,
                "imm",
                12,
                0,
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@ -689,12 +689,6 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
            }
        }
    }
-    for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
-        e.enc_both(
-            bint.bind(*to).bind(*from),
-            rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-        );
-    }

    // Copy Special
    // For x86-64, only define REX forms for now, since we can't describe the
@ -1454,7 +1448,6 @@ fn define_alu(
    // x86 has a bitwise not instruction NOT.
    e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
    e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
-    e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));

    // Also add a `b1` encodings for the logic instructions.
    // TODO: Should this be done with 8-bit instructions? It would improve partial register
@ -1494,13 +1487,8 @@ fn define_alu(
    for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
        // Cannot use enc_i32_i64 for this pattern because instructions require
        // to bind any.
-        e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
        e.enc32(
-            inst.bind(I32).bind(I16),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
-        );
-        e.enc32(
-            inst.bind(I32).bind(I32),
+            inst.bind(I32).bind(Any),
            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
        );
        e.enc64(
@ -1613,11 +1601,8 @@ fn define_simd(
    let sadd_sat = shared.by_name("sadd_sat");
    let scalar_to_vector = shared.by_name("scalar_to_vector");
    let sload8x8 = shared.by_name("sload8x8");
-    let sload8x8_complex = shared.by_name("sload8x8_complex");
    let sload16x4 = shared.by_name("sload16x4");
-    let sload16x4_complex = shared.by_name("sload16x4_complex");
    let sload32x2 = shared.by_name("sload32x2");
-    let sload32x2_complex = shared.by_name("sload32x2_complex");
    let spill = shared.by_name("spill");
    let sqrt = shared.by_name("sqrt");
    let sshr_imm = shared.by_name("sshr_imm");
@ -1626,15 +1611,11 @@ fn define_simd(
    let store_complex = shared.by_name("store_complex");
    let uadd_sat = shared.by_name("uadd_sat");
    let uload8x8 = shared.by_name("uload8x8");
-    let uload8x8_complex = shared.by_name("uload8x8_complex");
    let uload16x4 = shared.by_name("uload16x4");
-    let uload16x4_complex = shared.by_name("uload16x4_complex");
    let uload32x2 = shared.by_name("uload32x2");
-    let uload32x2_complex = shared.by_name("uload32x2_complex");
    let ushr_imm = shared.by_name("ushr_imm");
    let usub_sat = shared.by_name("usub_sat");
    let vconst = shared.by_name("vconst");
-    let vselect = shared.by_name("vselect");
    let x86_insertps = x86.by_name("x86_insertps");
    let x86_movlhps = x86.by_name("x86_movlhps");
    let x86_movsd = x86.by_name("x86_movsd");
@ -1645,8 +1626,6 @@ fn define_simd(
    let x86_pmaxu = x86.by_name("x86_pmaxu");
    let x86_pmins = x86.by_name("x86_pmins");
    let x86_pminu = x86.by_name("x86_pminu");
-    let x86_pmullq = x86.by_name("x86_pmullq");
-    let x86_pmuludq = x86.by_name("x86_pmuludq");
    let x86_pshufb = x86.by_name("x86_pshufb");
    let x86_pshufd = x86.by_name("x86_pshufd");
    let x86_psll = x86.by_name("x86_psll");
@ -1657,7 +1636,6 @@ fn define_simd(
    let x86_punpckl = x86.by_name("x86_punpckl");

    // Shorthands for recipes.
-    let rec_blend = r.template("blend");
    let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
    let rec_f_ib = r.template("f_ib");
    let rec_fa = r.template("fa");
@ -1727,20 +1705,6 @@ fn define_simd(
        e.enc_both_inferred(instruction, template);
    }

-    // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
-    // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
-    // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let opcode = match ty.lane_bits() {
-            32 => &BLENDVPS,
-            64 => &BLENDVPD,
-            _ => &PBLENDVB,
-        };
-        let instruction = vselect.bind(vector(ty, sse_vector_size));
-        let template = rec_blend.opcodes(opcode);
-        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
-    }
-
    // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
    // to the Intel manual: "When the destination operand is an XMM register, the source operand is
    // written to the low doubleword of the register and the register is zero-extended to 128 bits."
@ -2013,35 +1977,6 @@ fn define_simd(
        }
    }

-    // SIMD load extend (complex addressing)
-    let is_load_complex_length_two =
-        InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
-    for (inst, opcodes) in &[
-        (uload8x8_complex, &PMOVZXBW),
-        (uload16x4_complex, &PMOVZXWD),
-        (uload32x2_complex, &PMOVZXDQ),
-        (sload8x8_complex, &PMOVSXBW),
-        (sload16x4_complex, &PMOVSXWD),
-        (sload32x2_complex, &PMOVSXDQ),
-    ] {
-        for recipe in &[
-            rec_fldWithIndex,
-            rec_fldWithIndexDisp8,
-            rec_fldWithIndexDisp32,
-        ] {
-            let template = recipe.opcodes(*opcodes);
-            let predicate = |encoding: EncodingBuilder| {
-                encoding
-                    .isa_predicate(use_sse41_simd)
-                    .inst_predicate(is_load_complex_length_two.clone())
-            };
-            e.enc32_func(inst.clone(), template.clone(), predicate);
-            // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
-            e.enc64_func(inst.clone(), template.rex(), predicate);
-            e.enc64_func(inst.clone(), template, predicate);
-        }
-    }
-
    // SIMD integer addition
    for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
        let iadd = iadd.bind(vector(*ty, sse_vector_size));
@ -2101,14 +2036,12 @@ fn define_simd(
        e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
    }

-    // SIMD multiplication with lane expansion.
-    e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
-
    // SIMD integer multiplication for I64x2 using a AVX512.
    {
+        let imul = imul.bind(vector(I64, sse_vector_size));
        e.enc_32_64_maybe_isap(
-            x86_pmullq,
-            rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
+            imul,
+            rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
            Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
        );
    }
@ -2184,11 +2117,8 @@ fn define_simd(
        let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
        e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));

-        // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
-        if *ty != I64 {
-            let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
-            e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
-        }
+        let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
    }

    // SIMD integer comparisons
@ -2293,7 +2223,8 @@ fn define_entity_ref(
    let rec_gvaddr8 = r.template("gvaddr8");
    let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
    let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
-    let rec_spaddr_id = r.template("spaddr_id");
+    let rec_spaddr4_id = r.template("spaddr4_id");
+    let rec_spaddr8_id = r.template("spaddr8_id");

    // Predicates shorthands.
    let all_ones_funcaddrs_and_not_is_pic =
@ -2381,8 +2312,8 @@ fn define_entity_ref(
    //
    // TODO: Add encoding rules for stack_load and stack_store, so that they
    // don't get legalized to stack_addr + load/store.
-    e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
-    e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
+    e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
+    e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());

    // Constant addresses (PIC).
    e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
@ -283,7 +283,7 @@ pub(crate) fn define(
    Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
    register and re-orders the data according to the passed immediate byte.
    "#,
-            &formats.binary_imm8,
+            &formats.extract_lane,
        )
        .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
        .operands_out(vec![a]),
@ -314,7 +314,7 @@ pub(crate) fn define(
        The lane index, ``Idx``, is an immediate value, not an SSA value. It
        must indicate a valid lane index for the type of ``x``.
        "#,
-            &formats.binary_imm8,
+            &formats.extract_lane,
        )
        .operands_in(vec![x, Idx])
        .operands_out(vec![a]),
@ -342,9 +342,9 @@ pub(crate) fn define(
        The lane index, ``Idx``, is an immediate value, not an SSA value. It
        must indicate a valid lane index for the type of ``x``.
        "#,
-            &formats.ternary_imm8,
+            &formats.insert_lane,
        )
-        .operands_in(vec![x, y, Idx])
+        .operands_in(vec![x, Idx, y])
        .operands_out(vec![a]),
    );

@ -369,9 +369,9 @@ pub(crate) fn define(
        extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
        floats, which are already stored in an XMM register.
        "#,
-            &formats.ternary_imm8,
+            &formats.insert_lane,
        )
-        .operands_in(vec![x, y, Idx])
+        .operands_in(vec![x, Idx, y])
        .operands_out(vec![a]),
    );

@ -475,11 +475,10 @@ pub(crate) fn define(
            .includes_scalars(false)
            .build(),
    );
-    let I128 = &TypeVar::new(
-        "I128",
-        "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
-        this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
-        upper lane is concatenated with the lower lane to form the integer)",
+    let I64x2 = &TypeVar::new(
+        "I64x2",
+        "A SIMD vector type containing one large integer (the upper lane is concatenated with \
+         the lower lane to form the integer)",
        TypeSetBuilder::new()
            .ints(64..64)
            .simd_lanes(2..2)
@ -488,7 +487,7 @@ pub(crate) fn define(
    );

    let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
-    let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
+    let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
    let a = &Operand::new("a", IxN);

    ig.push(
@ -533,47 +532,6 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let I64x2 = &TypeVar::new(
-        "I64x2",
-        "A SIMD vector type containing two 64-bit integers",
-        TypeSetBuilder::new()
-            .ints(64..64)
-            .simd_lanes(2..2)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", I64x2);
-    let y = &Operand::new("y", I64x2);
-    let a = &Operand::new("a", I64x2);
-    ig.push(
-        Inst::new(
-            "x86_pmullq",
-            r#"
-        Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
-        lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
-        encodings for CPUs with newer vector features.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmuludq",
-            r#"
-        Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
-        unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
-        overflow as in `x86_pmullq`.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
    let x = &Operand::new("x", TxN);
    let y = &Operand::new("y", TxN);
    let f = &Operand::new("f", iflags);
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@ -8,7 +8,7 @@ use crate::shared::Definitions as SharedDefinitions;

 #[allow(clippy::many_single_char_names)]
 pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
-    let mut expand = TransformGroupBuilder::new(
+    let mut group = TransformGroupBuilder::new(
        "x86_expand",
        r#"
    Legalize instructions by expansion.
@ -18,37 +18,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    .isa("x86")
    .chain_with(shared.transform_groups.by_name("expand_flags").id);

-    let mut narrow = TransformGroupBuilder::new(
-        "x86_narrow",
-        r#"
-    Legalize instructions by narrowing.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
-
-    let mut narrow_avx = TransformGroupBuilder::new(
-        "x86_narrow_avx",
-        r#"
-    Legalize instructions by narrowing with CPU feature checks.
-
-    This special case converts using x86 AVX instructions where available."#,
-    )
-    .isa("x86");
-    // We cannot chain with the x86_narrow group until this group is built, see bottom of this
-    // function for where this is chained.
-
-    let mut widen = TransformGroupBuilder::new(
-        "x86_widen",
-        r#"
-    Legalize instructions by widening.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("widen").id);
-
    // List of instructions.
    let insts = &shared.instructions;
    let band = insts.by_name("band");
@ -68,8 +37,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let imul = insts.by_name("imul");
    let ineg = insts.by_name("ineg");
    let isub = insts.by_name("isub");
-    let ishl = insts.by_name("ishl");
-    let ireduce = insts.by_name("ireduce");
    let popcnt = insts.by_name("popcnt");
    let sdiv = insts.by_name("sdiv");
    let selectif = insts.by_name("selectif");
@ -78,7 +45,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let tls_value = insts.by_name("tls_value");
    let udiv = insts.by_name("udiv");
    let umulhi = insts.by_name("umulhi");
-    let ushr = insts.by_name("ushr");
    let ushr_imm = insts.by_name("ushr_imm");
    let urem = insts.by_name("urem");

@ -89,40 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    let imm = &shared.imm;

-    // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
-    // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
-    // not encodable.
-    let a = var("a");
-    let x = var("x");
-    let y = var("y");
-    let z = var("z");
-
-    for &ty in &[I8, I16, I32] {
-        let ishl_by_i64 = ishl.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ishl_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
-    for &ty in &[I8, I16, I32] {
-        let ushr_by_i64 = ushr.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ushr_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
    // Division and remainder.
    //
    // The srem expansion requires custom code because srem INT_MIN, -1 is not
    // allowed to trap. The other ops need to check avoid_div_traps.
-    expand.custom_legalize(sdiv, "expand_sdivrem");
-    expand.custom_legalize(srem, "expand_sdivrem");
-    expand.custom_legalize(udiv, "expand_udivrem");
-    expand.custom_legalize(urem, "expand_udivrem");
+    group.custom_legalize(sdiv, "expand_sdivrem");
+    group.custom_legalize(srem, "expand_sdivrem");
+    group.custom_legalize(udiv, "expand_udivrem");
+    group.custom_legalize(urem, "expand_udivrem");

    // Double length (widening) multiplication.
    let a = var("a");
@ -133,12 +73,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let res_lo = var("res_lo");
    let res_hi = var("res_hi");

-    expand.legalize(
+    group.legalize(
        def!(res_hi = umulhi(x, y)),
        vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
    );

-    expand.legalize(
+    group.legalize(
        def!(res_hi = smulhi(x, y)),
        vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
    );
@ -157,7 +97,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");

    // Equality needs an explicit `ord` test which checks the parity bit.
-    expand.legalize(
+    group.legalize(
        def!(a = fcmp(floatcc_eq, x, y)),
        vec![
            def!(a1 = fcmp(floatcc_ord, x, y)),
@ -165,7 +105,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
            def!(a = band(a1, a2)),
        ],
    );
-    expand.legalize(
+    group.legalize(
        def!(a = fcmp(floatcc_ne, x, y)),
        vec![
            def!(a1 = fcmp(floatcc_uno, x, y)),
@ -190,20 +130,20 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
        (floatcc_ugt, floatcc_ult),
        (floatcc_uge, floatcc_ule),
    ] {
-        expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
+        group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
    }

    // We need to modify the CFG for min/max legalization.
-    expand.custom_legalize(fmin, "expand_minmax");
-    expand.custom_legalize(fmax, "expand_minmax");
+    group.custom_legalize(fmin, "expand_minmax");
+    group.custom_legalize(fmax, "expand_minmax");

    // Conversions from unsigned need special handling.
-    expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
+    group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
    // Conversions from float to int can trap and modify the control flow graph.
-    expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
-    expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
-    expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
-    expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
+    group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
+    group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
+    group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
+    group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");

    // Count leading and trailing zeroes, for baseline x86_64
    let c_minus_one = var("c_minus_one");
@ -218,7 +158,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
    let imm64_minus_one = Literal::constant(&imm.imm64, -1);
    let imm64_63 = Literal::constant(&imm.imm64, 63);
-    expand.legalize(
+    group.legalize(
        def!(a = clz.I64(x)),
        vec![
            def!(c_minus_one = iconst(imm64_minus_one)),
@ -230,7 +170,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    );

    let imm64_31 = Literal::constant(&imm.imm64, 31);
-    expand.legalize(
+    group.legalize(
        def!(a = clz.I32(x)),
        vec![
            def!(c_minus_one = iconst(imm64_minus_one)),
@ -242,7 +182,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    );

    let imm64_64 = Literal::constant(&imm.imm64, 64);
-    expand.legalize(
+    group.legalize(
        def!(a = ctz.I64(x)),
        vec![
            def!(c_sixty_four = iconst(imm64_64)),
@ -252,7 +192,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    );

    let imm64_32 = Literal::constant(&imm.imm64, 32);
-    expand.legalize(
+    group.legalize(
        def!(a = ctz.I32(x)),
        vec![
            def!(c_thirty_two = iconst(imm64_32)),
@ -285,7 +225,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    let imm64_1 = Literal::constant(&imm.imm64, 1);
    let imm64_4 = Literal::constant(&imm.imm64, 4);
-    expand.legalize(
+    group.legalize(
        def!(r = popcnt.I64(x)),
        vec![
            def!(qv3 = ushr_imm(x, imm64_1)),
@ -326,7 +266,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
    let lc0F = var("lc0F");
    let lc01 = var("lc01");

-    expand.legalize(
+    group.legalize(
        def!(r = popcnt.I32(x)),
        vec![
            def!(lv3 = ushr_imm(x, imm64_1)),
@ -349,27 +289,31 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
        ],
    );

-    expand.custom_legalize(ineg, "convert_ineg");
-    expand.custom_legalize(tls_value, "expand_tls_value");
+    group.custom_legalize(ineg, "convert_ineg");
+
+    group.custom_legalize(tls_value, "expand_tls_value");
+
+    group.build_and_add_to(&mut shared.transform_groups);
+
+    let mut widen = TransformGroupBuilder::new(
+        "x86_widen",
+        r#"
+    Legalize instructions by widening.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("widen").id);
+
    widen.custom_legalize(ineg, "convert_ineg");
-
-    // To reduce compilation times, separate out large blocks of legalizations by theme.
-    define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
-
-    expand.build_and_add_to(&mut shared.transform_groups);
-    let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
-    narrow_avx
-        .chain_with(narrow_id)
-        .build_and_add_to(&mut shared.transform_groups);
    widen.build_and_add_to(&mut shared.transform_groups);
+
+    // To reduce compilation times, separate out large blocks of legalizations by
+    // theme.
+    define_simd(shared, x86_instructions);
 }

-fn define_simd(
-    shared: &mut SharedDefinitions,
-    x86_instructions: &InstructionGroup,
-    narrow: &mut TransformGroupBuilder,
-    narrow_avx: &mut TransformGroupBuilder,
-) {
+fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
    let insts = &shared.instructions;
    let band = insts.by_name("band");
    let band_not = insts.by_name("band_not");
@ -386,7 +330,6 @@ fn define_simd(
    let icmp = insts.by_name("icmp");
    let imax = insts.by_name("imax");
    let imin = insts.by_name("imin");
-    let imul = insts.by_name("imul");
    let ineg = insts.by_name("ineg");
    let insertlane = insts.by_name("insertlane");
    let ishl = insts.by_name("ishl");
@ -406,7 +349,6 @@ fn define_simd(
    let vconst = insts.by_name("vconst");
    let vall_true = insts.by_name("vall_true");
    let vany_true = insts.by_name("vany_true");
-    let vselect = insts.by_name("vselect");

    let x86_packss = x86_instructions.by_name("x86_packss");
    let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
@ -422,6 +364,16 @@ fn define_simd(

    let imm = &shared.imm;

+    let mut narrow = TransformGroupBuilder::new(
+        "x86_narrow",
+        r#"
+    Legalize instructions by narrowing.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
+
    // Set up variables and immediates.
    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
@ -478,7 +430,7 @@ fn define_simd(
                // Move into the lowest 16 bits of an XMM register.
                def!(a = scalar_to_vector(x)),
                // Insert the value again but in the next lowest 16 bits.
-                def!(b = insertlane(a, x, uimm8_one)),
+                def!(b = insertlane(a, uimm8_one, x)),
                // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
                def!(c = raw_bitcast_any16x8_to_i32x4(b)),
                // Broadcast the bytes in the XMM register with PSHUFD.
@ -512,7 +464,7 @@ fn define_simd(
                // Move into the lowest 64 bits of an XMM register.
                def!(a = scalar_to_vector(x)),
                // Move into the highest 64 bits of the same XMM register.
-                def!(y = insertlane(a, x, uimm8_one)),
+                def!(y = insertlane(a, uimm8_one, x)),
            ],
        );
    }
@ -541,8 +493,8 @@ fn define_simd(
        );
    }

-    // SIMD shift right (arithmetic, i16x8 and i32x4)
-    for ty in &[I16, I32] {
+    // SIMD shift right (arithmetic)
+    for ty in &[I16, I32, I64] {
        let sshr = sshr.bind(vector(*ty, sse_vector_size));
        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
        narrow.legalize(
@ -550,7 +502,6 @@ fn define_simd(
            vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
        );
    }
-    // SIMD shift right (arithmetic, i8x16)
    {
        let sshr = sshr.bind(vector(I8, sse_vector_size));
        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
@ -575,25 +526,6 @@ fn define_simd(
            ],
        );
    }
-    // SIMD shift right (arithmetic, i64x2)
-    {
-        let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
-        let sshr_scalar_lane0 = sshr.bind(I64);
-        let sshr_scalar_lane1 = sshr.bind(I64);
-        narrow.legalize(
-            def!(z = sshr_vector(x, y)),
-            vec![
-                // Use scalar operations to shift the first lane.
-                def!(a = extractlane(x, uimm8_zero)),
-                def!(b = sshr_scalar_lane0(a, y)),
-                def!(c = insertlane(x, b, uimm8_zero)),
-                // Do the same for the second lane.
-                def!(d = extractlane(x, uimm8_one)),
-                def!(e = sshr_scalar_lane1(d, y)),
-                def!(z = insertlane(c, e, uimm8_one)),
-            ],
-        );
-    }

    // SIMD select
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
@ -608,17 +540,6 @@ fn define_simd(
        );
    }

-    // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
-    // This works, because each lane of boolean vector is filled with zeroes or ones.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vselect = vselect.bind(vector(ty, sse_vector_size));
-        let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(d = vselect(c, x, y)),
-            vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
-        );
-    }
-
    // SIMD vany_true
    let ne = Literal::enumerator_for(&imm.intcc, "ne");
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
@ -788,6 +709,5 @@ fn define_simd(
    narrow.custom_legalize(ushr, "convert_ushr");
    narrow.custom_legalize(ishl, "convert_ishl");

-    // This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
-    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
+    narrow.build_and_add_to(&mut shared.transform_groups);
 }
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
@ -1,6 +1,6 @@
 use crate::cdsl::cpu_modes::CpuMode;
 use crate::cdsl::isa::TargetIsa;
-use crate::cdsl::types::{ReferenceType, VectorType};
+use crate::cdsl::types::ReferenceType;

 use crate::shared::types::Bool::B1;
 use crate::shared::types::Float::{F32, F64};
@ -35,7 +35,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
    let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
    let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
-    let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
    let x86_expand = shared_defs.transform_groups.by_name("x86_expand");

    x86_32.legalize_monomorphic(expand_flags);
@ -47,7 +46,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
    x86_32.legalize_type(F32, x86_expand);
    x86_32.legalize_type(F64, x86_expand);
-    x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);

    x86_64.legalize_monomorphic(expand_flags);
    x86_64.legalize_default(x86_narrow);
@ -59,7 +57,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
    x86_64.legalize_type(F32, x86_expand);
    x86_64.legalize_type(F64, x86_expand);
-    x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);

    let recipes = recipes::define(shared_defs, &settings, &regs);

--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@ -54,14 +54,6 @@ pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
 /// Bit scan reverse (stores index of first encountered 1 from the back).
 pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];

-/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
-
-/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
-
 /// Call near, relative, displacement relative to next instruction (sign-extended).
 pub static CALL_RELATIVE: [u8; 1] = [0xe8];

@ -343,10 +335,6 @@ pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
 /// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
 pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];

-/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
-/// in XMM0 and store the values into xmm1 (SSE4.1).
-pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
-
 /// Compare packed data for equal (SSE2).
 pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];

@ -471,11 +459,7 @@ pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];

 /// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
 /// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
-pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
-
-/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
-/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
-pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
+pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];

 /// Pop top of stack into r{16,32,64}; increment stack pointer.
 pub static POP_REG: [u8; 1] = [0x58];
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
@ -427,7 +427,6 @@ pub(crate) fn define<'shared>(
    let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
    let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
    let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
-    let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));

    // Stack operand with a 32-bit signed displacement from either RBP or RSP.
    let stack_gpr32 = Stack::new(gpr);
@ -608,12 +607,12 @@ pub(crate) fn define<'shared>(
    // XX /r with FPR ins and outs. A form with a byte immediate.
    {
        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
+            EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
                .operands_in(vec![fpr, fpr])
                .operands_out(vec![0])
                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.ternary_imm8,
-                    "imm",
+                    &*formats.insert_lane,
+                    "lane",
                    8,
                    0,
                ))
@ -621,7 +620,7 @@ pub(crate) fn define<'shared>(
                    r#"
                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
                    modrm_rr(in_reg1, in_reg0, sink);
-                    let imm: i64 = imm.into();
+                    let imm:i64 = lane.into();
                    sink.put1(imm as u8);
                "#,
                ),
@ -905,32 +904,14 @@ pub(crate) fn define<'shared>(
        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
    );

-    // XX /r for BLEND* instructions
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
-            .operands_in(vec![
-                OperandConstraint::FixedReg(reg_xmm0),
-                OperandConstraint::RegClass(fpr),
-                OperandConstraint::RegClass(fpr),
-            ])
-            .operands_out(vec![2])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
-                    modrm_rr(in_reg1, in_reg2, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg1_inreg2",
-    );
-
    // XX /n ib with 8-bit immediate sign-extended.
    {
        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
+            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
                .operands_in(vec![gpr])
                .operands_out(vec![0])
                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm64,
+                    &*formats.binary_imm,
                    "imm",
                    8,
                    0,
@ -947,11 +928,11 @@ pub(crate) fn define<'shared>(
        );

        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
+            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
                .operands_in(vec![fpr])
                .operands_out(vec![0])
                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm64,
+                    &*formats.binary_imm,
                    "imm",
                    8,
                    0,
@ -970,11 +951,11 @@ pub(crate) fn define<'shared>(
        // XX /n id with 32-bit immediate sign-extended.
        recipes.add_template(
            Template::new(
-                EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
+                EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
                    .operands_in(vec![gpr])
                    .operands_out(vec![0])
                    .inst_predicate(InstructionPredicate::new_is_signed_int(
-                        &*formats.binary_imm64,
+                        &*formats.binary_imm,
                        "imm",
                        32,
                        0,
@ -996,20 +977,20 @@ pub(crate) fn define<'shared>(
    // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
    {
        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
                .operands_in(vec![fpr])
                .operands_out(vec![fpr])
                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.binary_imm8,
-                    "imm",
+                    &*formats.extract_lane,
+                    "lane",
                    8,
                    0,
-                ))
+                )) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
                .emit(
                    r#"
                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
                    modrm_rr(in_reg0, out_reg0, sink);
-                    let imm: i64 = imm.into();
+                    let imm:i64 = lane.into();
                    sink.put1(imm as u8);
                "#,
                ),
@ -1020,17 +1001,17 @@ pub(crate) fn define<'shared>(
    // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
    {
        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
                .operands_in(vec![fpr])
                .operands_out(vec![gpr])
                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.binary_imm8, "imm", 8, 0,
+                    &*formats.extract_lane, "lane", 8, 0,
                ))
                .emit(
                    r#"
                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
                    modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
-                    let imm: i64 = imm.into();
+                    let imm:i64 = lane.into();
                    sink.put1(imm as u8);
                "#,
                ), "size_with_inferred_rex_for_inreg0_outreg0"
@ -1040,12 +1021,12 @@ pub(crate) fn define<'shared>(
    // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
    {
        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
                .operands_in(vec![fpr, gpr])
                .operands_out(vec![0])
                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.ternary_imm8,
-                    "imm",
+                    &*formats.insert_lane,
+                    "lane",
                    8,
                    0,
                ))
@ -1053,7 +1034,7 @@ pub(crate) fn define<'shared>(
                    r#"
                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
                    modrm_rr(in_reg1, in_reg0, sink);
-                    let imm: i64 = imm.into();
+                    let imm:i64 = lane.into();
                    sink.put1(imm as u8);
                "#,
                ),
@ -1451,7 +1432,23 @@ pub(crate) fn define<'shared>(
    // TODO Alternative forms for 8-bit immediates, when applicable.

    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
+        EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(out_reg0, base), sink);
+                    modrm_sib_disp8(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6)
            .operands_out(vec![gpr])
            .emit(
                r#"
@ -2874,12 +2871,12 @@ pub(crate) fn define<'shared>(

    {
        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0);

        // XX /n, MI form with imm8.
        recipes.add_template(
            Template::new(
-                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
+                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2)
                    .operands_in(vec![gpr])
                    .operands_out(vec![reg_rflags])
                    .inst_predicate(has_small_offset)
@ -2897,12 +2894,12 @@ pub(crate) fn define<'shared>(
        );

        let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0);

        // XX /n, MI form with imm32.
        recipes.add_template(
            Template::new(
-                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
+                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5)
                    .operands_in(vec![gpr])
                    .operands_out(vec![reg_rflags])
                    .inst_predicate(has_big_offset)
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
 pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    let mut settings = SettingGroupBuilder::new("x86");

-    settings.add_bool(
-        "use_new_backend",
-        "Whether to use the new codegen backend using the new isel",
-        false,
-    );
-
    // CPUID.01H:ECX
    let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
    let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
--- a/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
@ -4,7 +4,7 @@ use std::rc::Rc;

 pub(crate) struct Formats {
    pub(crate) binary: Rc<InstructionFormat>,
-    pub(crate) binary_imm64: Rc<InstructionFormat>,
+    pub(crate) binary_imm: Rc<InstructionFormat>,
    pub(crate) branch: Rc<InstructionFormat>,
    pub(crate) branch_float: Rc<InstructionFormat>,
    pub(crate) branch_icmp: Rc<InstructionFormat>,
@ -17,13 +17,14 @@ pub(crate) struct Formats {
    pub(crate) cond_trap: Rc<InstructionFormat>,
    pub(crate) copy_special: Rc<InstructionFormat>,
    pub(crate) copy_to_ssa: Rc<InstructionFormat>,
-    pub(crate) binary_imm8: Rc<InstructionFormat>,
+    pub(crate) extract_lane: Rc<InstructionFormat>,
    pub(crate) float_compare: Rc<InstructionFormat>,
    pub(crate) float_cond: Rc<InstructionFormat>,
    pub(crate) float_cond_trap: Rc<InstructionFormat>,
    pub(crate) func_addr: Rc<InstructionFormat>,
    pub(crate) heap_addr: Rc<InstructionFormat>,
    pub(crate) indirect_jump: Rc<InstructionFormat>,
+    pub(crate) insert_lane: Rc<InstructionFormat>,
    pub(crate) int_compare: Rc<InstructionFormat>,
    pub(crate) int_compare_imm: Rc<InstructionFormat>,
    pub(crate) int_cond: Rc<InstructionFormat>,
@ -44,7 +45,6 @@ pub(crate) struct Formats {
    pub(crate) store_complex: Rc<InstructionFormat>,
    pub(crate) table_addr: Rc<InstructionFormat>,
    pub(crate) ternary: Rc<InstructionFormat>,
-    pub(crate) ternary_imm8: Rc<InstructionFormat>,
    pub(crate) trap: Rc<InstructionFormat>,
    pub(crate) unary: Rc<InstructionFormat>,
    pub(crate) unary_bool: Rc<InstructionFormat>,
@ -76,9 +76,7 @@ impl Formats {

            binary: Builder::new("Binary").value().value().build(),

-            binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
-
-            binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
+            binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(),

            // The select instructions are controlled by the second VALUE operand.
            // The first VALUE operand is the controlling flag which has a derived type.
@ -90,18 +88,23 @@ impl Formats {
                .typevar_operand(1)
                .build(),

-            ternary_imm8: Builder::new("TernaryImm8")
-                .value()
-                .imm(&imm.uimm8)
-                .value()
-                .build(),
-
            // Catch-all for instructions with many outputs and inputs and no immediate
            // operands.
            multiary: Builder::new("MultiAry").varargs().build(),

            nullary: Builder::new("NullAry").build(),

+            insert_lane: Builder::new("InsertLane")
+                .value()
+                .imm_with_name("lane", &imm.uimm8)
+                .value()
+                .build(),
+
+            extract_lane: Builder::new("ExtractLane")
+                .value()
+                .imm_with_name("lane", &imm.uimm8)
+                .build(),
+
            shuffle: Builder::new("Shuffle")
                .value()
                .value()
--- a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@ -559,9 +559,9 @@ fn define_simd_lane_access(
        The lane index, ``Idx``, is an immediate value, not an SSA value. It
        must indicate a valid lane index for the type of ``x``.
        "#,
-            &formats.ternary_imm8,
+            &formats.insert_lane,
        )
-        .operands_in(vec![x, y, Idx])
+        .operands_in(vec![x, Idx, y])
        .operands_out(vec![a]),
    );

@ -579,7 +579,7 @@ fn define_simd_lane_access(
        may or may not be zeroed depending on the ISA but the type system should prevent using
        ``a`` as anything other than the extracted value.
        "#,
-            &formats.binary_imm8,
+            &formats.extract_lane,
        )
        .operands_in(vec![x, Idx])
        .operands_out(vec![a]),
@ -1172,20 +1172,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "uload8x8_complex",
-            r#"
-        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
-        i16x8 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    ig.push(
        Inst::new(
            "sload8x8",
@ -1200,20 +1186,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "sload8x8_complex",
-            r#"
-        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
-        i16x8 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    let I32x4 = &TypeVar::new(
        "I32x4",
        "A SIMD vector with exactly 4 lanes of 32-bit values",
@ -1229,7 +1201,7 @@ pub(crate) fn define(
        Inst::new(
            "uload16x4",
            r#"
-        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 
+        Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 
        vector.
        "#,
            &formats.load,
@ -1239,20 +1211,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "uload16x4_complex",
-            r#"
-        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
-        i32x4 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    ig.push(
        Inst::new(
            "sload16x4",
@ -1267,20 +1225,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "sload16x4_complex",
-            r#"
-        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
-        i32x4 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    let I64x2 = &TypeVar::new(
        "I64x2",
        "A SIMD vector with exactly 2 lanes of 64-bit values",
@ -1306,20 +1250,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "uload32x2_complex",
-            r#"
-        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
-        i64x2 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    ig.push(
        Inst::new(
            "sload32x2",
@ -1334,20 +1264,6 @@ pub(crate) fn define(
        .can_load(true),
    );

-    ig.push(
-        Inst::new(
-            "sload32x2_complex",
-            r#"
-        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
-        i64x2 vector.
-        "#,
-            &formats.load_complex,
-        )
-        .operands_in(vec![MemFlags, args, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    let x = &Operand::new("x", Mem).with_doc("Value to be stored");
    let a = &Operand::new("a", Mem).with_doc("Value loaded");
    let Offset =
@ -2215,7 +2131,7 @@ pub(crate) fn define(
        Like `icmp_imm`, but returns integer CPU flags instead of testing
        a specific condition code.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![f]),
@ -2265,7 +2181,7 @@ pub(crate) fn define(
        This is similar to `iadd` but the operands are interpreted as signed integers and their
        summed result, instead of wrapping, will be saturated to the lowest or highest
        signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
-        since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
+        since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
        clamped to `0x7F`.
        "#,
            &formats.binary,
@ -2460,7 +2376,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2475,7 +2391,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2489,7 +2405,7 @@ pub(crate) fn define(

        This operation traps if the divisor is zero.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2505,7 +2421,7 @@ pub(crate) fn define(
        representable in `B` bits two's complement. This only happens
        when `x = -2^{B-1}, Y = -1`.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2519,7 +2435,7 @@ pub(crate) fn define(

        This operation traps if the divisor is zero.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2533,7 +2449,7 @@ pub(crate) fn define(

        This operation traps if the divisor is zero.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2552,7 +2468,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2952,7 +2868,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2969,7 +2885,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -2986,7 +2902,7 @@ pub(crate) fn define(
        Polymorphic over all scalar integer types, but does not support vector
        types.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -3031,7 +2947,7 @@ pub(crate) fn define(
            r#"
        Rotate left by immediate.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -3043,7 +2959,7 @@ pub(crate) fn define(
            r#"
        Rotate right by immediate.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -3118,7 +3034,7 @@ pub(crate) fn define(

        The shift amount is masked to the size of ``x``.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -3132,7 +3048,7 @@ pub(crate) fn define(

        The shift amount is masked to the size of the register.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
@ -3146,7 +3062,7 @@ pub(crate) fn define(

        The shift amount is masked to the size of the register.
        "#,
-            &formats.binary_imm64,
+            &formats.binary_imm,
        )
        .operands_in(vec![x, Y])
        .operands_out(vec![a]),
--- a/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
@ -61,7 +61,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
    let cls = insts.by_name("cls");
    let clz = insts.by_name("clz");
    let ctz = insts.by_name("ctz");
-    let copy = insts.by_name("copy");
    let fabs = insts.by_name("fabs");
    let f32const = insts.by_name("f32const");
    let f64const = insts.by_name("f64const");
@ -199,6 +198,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
    let ah = var("ah");
    let cc = var("cc");
    let block = var("block");
+    let block1 = var("block1");
+    let block2 = var("block2");
    let ptr = var("ptr");
    let flags = var("flags");
    let offset = var("off");
@ -211,8 +212,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
    // embedded as part of arguments), so use a custom legalization for now.
    narrow.custom_legalize(iconst, "narrow_iconst");

-    for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
-        let inst = uextend.bind(ty).bind(ty_half);
+    {
+        let inst = uextend.bind(I128).bind(I64);
        narrow.legalize(
            def!(a = inst(x)),
            vec![
@ -222,12 +223,12 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
        );
    }

-    for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
-        let inst = sextend.bind(ty).bind(ty_half);
+    {
+        let inst = sextend.bind(I128).bind(I64);
        narrow.legalize(
            def!(a = inst(x)),
            vec![
-                def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
+                def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, 63))), // splat sign bit to whole number
                def!(a = iconcat(x, ah)),
            ],
        );
@ -267,45 +268,39 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
        ],
    );

-    for &ty in &[I128, I64] {
-        let block = var("block");
-        let block1 = var("block1");
-        let block2 = var("block2");
+    narrow.legalize(
+        def!(brz.I128(x, block, vararg)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(
+                a = icmp_imm(
+                    Literal::enumerator_for(&imm.intcc, "eq"),
+                    xl,
+                    Literal::constant(&imm.imm64, 0)
+                )
+            ),
+            def!(
+                b = icmp_imm(
+                    Literal::enumerator_for(&imm.intcc, "eq"),
+                    xh,
+                    Literal::constant(&imm.imm64, 0)
+                )
+            ),
+            def!(c = band(a, b)),
+            def!(brnz(c, block, vararg)),
+        ],
+    );

-        narrow.legalize(
-            def!(brz.ty(x, block, vararg)),
-            vec![
-                def!((xl, xh) = isplit(x)),
-                def!(
-                    a = icmp_imm(
-                        Literal::enumerator_for(&imm.intcc, "eq"),
-                        xl,
-                        Literal::constant(&imm.imm64, 0)
-                    )
-                ),
-                def!(
-                    b = icmp_imm(
-                        Literal::enumerator_for(&imm.intcc, "eq"),
-                        xh,
-                        Literal::constant(&imm.imm64, 0)
-                    )
-                ),
-                def!(c = band(a, b)),
-                def!(brnz(c, block, vararg)),
-            ],
-        );
-
-        narrow.legalize(
-            def!(brnz.ty(x, block1, vararg)),
-            vec![
-                def!((xl, xh) = isplit(x)),
-                def!(brnz(xl, block1, vararg)),
-                def!(jump(block2, Literal::empty_vararg())),
-                block!(block2),
-                def!(brnz(xh, block1, vararg)),
-            ],
-        );
-    }
+    narrow.legalize(
+        def!(brnz.I128(x, block1, vararg)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(brnz(xl, block1, vararg)),
+            def!(jump(block2, Literal::empty_vararg())),
+            block!(block2),
+            def!(brnz(xh, block1, vararg)),
+        ],
+    );

    narrow.legalize(
        def!(a = popcnt.I128(x)),
@ -634,14 +629,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
        );
    }

-    for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
-        let inst = ireduce.bind(ty_half).bind(ty);
-        expand.legalize(
-            def!(a = inst(x)),
-            vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
-        );
-    }
-
    // Expand integer operations with carry for RISC architectures that don't have
    // the flags.
    let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
--- a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
@ -25,14 +25,11 @@ pub(crate) fn define() -> SettingGroup {
        - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
        time to allocate registers, but generated code's quality may be inferior. As of
        2020-04-17, it is still experimental and it should not be used in production settings.
-        - `experimental_linear_scan_checked` is the linear scan allocator with additional self
-        checks that may take some time to run, and thus these checks are disabled by default.
    "#,
        vec![
            "backtracking",
            "backtracking_checked",
            "experimental_linear_scan",
-            "experimental_linear_scan_checked",
        ],
    );

--- a/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"d3026bf5426d767b0b23f0a4f6272aaeb68f598a92f6c788c1f6948153fa63c3","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
+{"files":{"Cargo.toml":"702a281a26cf7099e1b3ca5e8bea145c113f52242be4f1e7e5b06bf129092599","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
--- a/third_party/rust/cranelift-codegen-shared/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-shared/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen-shared"
-version = "0.64.0"
+version = "0.63.0"
 description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
--- a/third_party/rust/cranelift-codegen/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen/.cargo-checksum.json
--- a/third_party/rust/cranelift-codegen/Cargo.toml
+++ b/third_party/rust/cranelift-codegen/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen"
-version = "0.64.0"
+version = "0.63.0"
 description = "Low-level code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-codegen"
@ -13,27 +13,25 @@ build = "build.rs"
 edition = "2018"

 [dependencies]
-cranelift-codegen-shared = { path = "./shared", version = "0.64.0" }
-cranelift-entity = { path = "../entity", version = "0.64.0" }
-cranelift-bforest = { path = "../bforest", version = "0.64.0" }
+cranelift-codegen-shared = { path = "./shared", version = "0.63.0" }
+cranelift-entity = { path = "../entity", version = "0.63.0" }
+cranelift-bforest = { path = "../bforest", version = "0.63.0" }
 hashbrown = { version = "0.7", optional = true }
 target-lexicon = "0.10"
 log = { version = "0.4.6", default-features = false }
 serde = { version = "1.0.94", features = ["derive"], optional = true }
-gimli = { version = "0.21.0", default-features = false, features = ["write"], optional = true }
+gimli = { version = "0.20.0", default-features = false, features = ["write"], optional = true }
 smallvec = { version = "1.0.0" }
 thiserror = "1.0.4"
 byteorder = { version = "1.3.2", default-features = false }
-peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.1.0" }
-regalloc = "0.0.25"
+regalloc = "0.0.21"
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
 # accomodated in `tests`.

 [build-dependencies]
-cranelift-codegen-meta = { path = "meta", version = "0.64.0" }
-peepmatic = { path = "../peepmatic", optional = true, version = "0.64.0" }
+cranelift-codegen-meta = { path = "meta", version = "0.63.0" }

 [features]
 default = ["std", "unwind"]
@ -60,12 +58,10 @@ x86 = []
 arm32 = []
 arm64 = []
 riscv = []
-x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.

 # Option to enable all architectures.
 all-arch = [
    "x86",
-    "x64",
    "arm32",
    "arm64",
    "riscv"
@ -74,12 +70,5 @@ all-arch = [
 # For dependent crates that want to serialize some parts of cranelift
 enable-serde = ["serde"]

-# Recompile our optimizations that are written in the `peepmatic` DSL into a
-# compact finite-state transducer automaton.
-rebuild-peephole-optimizers = ["peepmatic"]
-
-# Enable the use of `peepmatic`-generated peephole optimizers.
-enable-peepmatic = ["peepmatic-runtime"]
-
 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen/build.rs
+++ b/third_party/rust/cranelift-codegen/build.rs
@ -71,22 +71,4 @@ fn main() {
        );
        println!("cargo:warning=Generated files are in {}", out_dir);
    }
-
-    #[cfg(feature = "rebuild-peephole-optimizers")]
-    rebuild_peephole_optimizers();
-}
-
-#[cfg(feature = "rebuild-peephole-optimizers")]
-fn rebuild_peephole_optimizers() {
-    use std::path::Path;
-
-    let source_path = Path::new("src").join("preopt.peepmatic");
-    println!("cargo:rerun-if-changed={}", source_path.display());
-
-    let preopt =
-        peepmatic::compile_file(&source_path).expect("failed to compile `src/preopt.peepmatic`");
-
-    preopt
-        .serialize_to_file(&Path::new("src").join("preopt.serialized"))
-        .expect("failed to serialize peephole optimizer to `src/preopt.serialized`");
 }
--- a/third_party/rust/cranelift-codegen/src/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/abi.rs
@ -54,9 +54,6 @@ pub enum ValueConversion {

    /// Unsigned zero-extend value to the required type.
    Uext(Type),
-
-    /// Pass value by pointer of given integer type.
-    Pointer(Type),
 }

 impl ValueConversion {
@ -66,7 +63,7 @@ impl ValueConversion {
            Self::IntSplit => ty.half_width().expect("Integer type too small to split"),
            Self::VectorSplit => ty.half_vector().expect("Not a vector"),
            Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
-            Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty,
+            Self::Sext(nty) | Self::Uext(nty) => nty,
        }
    }

@ -77,11 +74,6 @@ impl ValueConversion {
            _ => false,
        }
    }
-
-    /// Is this a conversion to pointer?
-    pub fn is_pointer(self) -> bool {
-        matches!(self, Self::Pointer(_))
-    }
 }

 /// Common trait for assigning arguments to registers or stack locations.
@ -118,16 +110,10 @@ pub fn legalize_args<AA: ArgAssigner>(args: &[AbiParam], aa: &mut AA) -> Option<
            }
            // Split this argument into two smaller ones. Then revisit both.
            ArgAction::Convert(conv) => {
-                debug_assert!(
-                    !arg.legalized_to_pointer,
-                    "No more conversions allowed after conversion to pointer"
-                );
                let value_type = conv.apply(arg.value_type);
+                let new_arg = AbiParam { value_type, ..arg };
                args.to_mut()[argno].value_type = value_type;
-                if conv.is_pointer() {
-                    args.to_mut()[argno].legalized_to_pointer = true;
-                } else if conv.is_split() {
-                    let new_arg = AbiParam { value_type, ..arg };
+                if conv.is_split() {
                    args.to_mut().insert(argno + 1, new_arg);
                }
            }
@ -166,10 +152,6 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
    let have_bits = have.bits();
    let arg_bits = arg.value_type.bits();

-    if arg.legalized_to_pointer {
-        return ValueConversion::Pointer(arg.value_type);
-    }
-
    match have_bits.cmp(&arg_bits) {
        // We have fewer bits than the ABI argument.
        Ordering::Less => {
@ -244,12 +226,5 @@ mod tests {
            legalize_abi_value(types::F64, &arg),
            ValueConversion::IntBits
        );
-
-        // Value is passed by reference
-        arg.legalized_to_pointer = true;
-        assert_eq!(
-            legalize_abi_value(types::F64, &arg),
-            ValueConversion::Pointer(types::I32)
-        );
    }
 }
--- a/third_party/rust/cranelift-codegen/src/binemit/stackmap.rs
+++ b/third_party/rust/cranelift-codegen/src/binemit/stackmap.rs
@ -15,8 +15,7 @@ const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
 /// The first value in the bitmap is of the lowest addressed slot on the stack.
 /// As all stacks in Isa's supported by Cranelift grow down, this means that
 /// first value is of the top of the stack and values proceed down the stack.
-#[derive(Clone, Debug, PartialEq, Eq)]
-#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
+#[derive(Clone, Debug)]
 pub struct Stackmap {
    bitmap: Vec<BitSet<Num>>,
    mapped_words: u32,
--- a/third_party/rust/cranelift-codegen/src/bitset.rs
+++ b/third_party/rust/cranelift-codegen/src/bitset.rs
@ -5,14 +5,12 @@
 //!
 //! If you would like to add support for larger bitsets in the future, you need to change the trait
 //! bound Into<u32> and the u32 in the implementation of `max_bits()`.
-
 use core::convert::{From, Into};
 use core::mem::size_of;
 use core::ops::{Add, BitOr, Shl, Sub};

 /// A small bitset built on a single primitive integer type
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BitSet<T>(pub T);

 impl<T> BitSet<T>
--- a/third_party/rust/cranelift-codegen/src/context.rs
+++ b/third_party/rust/cranelift-codegen/src/context.rs
@ -27,7 +27,6 @@ use crate::nan_canonicalization::do_nan_canonicalization;
 use crate::postopt::do_postopt;
 use crate::redundant_reload_remover::RedundantReloadRemover;
 use crate::regalloc;
-use crate::remove_constant_phis::do_remove_constant_phis;
 use crate::result::CodegenResult;
 use crate::settings::{FlagsOrIsa, OptLevel};
 use crate::simple_gvn::do_simple_gvn;
@ -180,8 +179,6 @@ impl Context {
            self.dce(isa)?;
        }

-        self.remove_constant_phis(isa)?;
-
        if let Some(backend) = isa.get_mach_backend() {
            let result = backend.compile_function(&self.func, self.want_disasm)?;
            let info = result.code_info();
@ -227,7 +224,7 @@ impl Context {
        let _tt = timing::binemit();
        let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
        if let Some(ref result) = &self.mach_compile_result {
-            result.buffer.emit(&mut sink);
+            result.sections.emit(&mut sink);
        } else {
            isa.emit_function_to_memory(&self.func, &mut sink);
        }
@ -295,16 +292,6 @@ impl Context {
        Ok(())
    }

-    /// Perform constant-phi removal on the function.
-    pub fn remove_constant_phis<'a, FOI: Into<FlagsOrIsa<'a>>>(
-        &mut self,
-        fisa: FOI,
-    ) -> CodegenResult<()> {
-        do_remove_constant_phis(&mut self.func, &mut self.domtree);
-        self.verify_if(fisa)?;
-        Ok(())
-    }
-
    /// Perform pre-legalization rewrites on the function.
    pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
        do_preopt(&mut self.func, &mut self.cfg, isa);
--- a/third_party/rust/cranelift-codegen/src/cursor.rs
+++ b/third_party/rust/cranelift-codegen/src/cursor.rs
@ -794,20 +794,15 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
        if !self.srcloc.is_default() {
            self.func.srclocs[inst] = self.srcloc;
        }
-
-        // Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
-        // during lowering.
-        if self.isa.get_mach_backend().is_none() {
-            // Assign an encoding.
-            // XXX Is there a way to describe this error to the user?
-            #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
-            match self
-                .isa
-                .encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
-            {
-                Ok(e) => self.func.encodings[inst] = e,
-                Err(_) => panic!("can't encode {}", self.display_inst(inst)),
-            }
+        // Assign an encoding.
+        // XXX Is there a way to describe this error to the user?
+        #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
+        match self
+            .isa
+            .encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
+        {
+            Ok(e) => self.func.encodings[inst] = e,
+            Err(_) => panic!("can't encode {}", self.display_inst(inst)),
        }

        &mut self.func.dfg
--- a/third_party/rust/cranelift-codegen/src/inst_predicates.rs
+++ b/third_party/rust/cranelift-codegen/src/inst_predicates.rs
@ -40,24 +40,3 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
    let opcode = data.opcode();
    trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
 }
-
-/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
-pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
-    has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
-}
-
-/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
-/// represented in 64 bits?
-pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
-    let data = &func.dfg[inst];
-    if data.opcode() == Opcode::Null {
-        return Some(0);
-    }
-    match data {
-        &InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
-        &InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
-        &InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
-        &InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
-        _ => None,
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/ir/dfg.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/dfg.rs
@ -234,7 +234,11 @@ impl DataFlowGraph {

    /// Get the type of a value.
    pub fn value_type(&self, v: Value) -> Type {
-        self.values[v].ty()
+        match self.values[v] {
+            ValueData::Inst { ty, .. }
+            | ValueData::Param { ty, .. }
+            | ValueData::Alias { ty, .. } => ty,
+        }
    }

    /// Get the definition of a value.
@ -379,14 +383,9 @@ pub enum ValueDef {
 impl ValueDef {
    /// Unwrap the instruction where the value was defined, or panic.
    pub fn unwrap_inst(&self) -> Inst {
-        self.inst().expect("Value is not an instruction result")
-    }
-
-    /// Get the instruction where the value was defined, if any.
-    pub fn inst(&self) -> Option<Inst> {
        match *self {
-            Self::Result(inst, _) => Some(inst),
-            _ => None,
+            Self::Result(inst, _) => inst,
+            _ => panic!("Value is not an instruction result"),
        }
    }

@ -429,16 +428,6 @@ enum ValueData {
    Alias { ty: Type, original: Value },
 }

-impl ValueData {
-    fn ty(&self) -> Type {
-        match *self {
-            ValueData::Inst { ty, .. }
-            | ValueData::Param { ty, .. }
-            | ValueData::Alias { ty, .. } => ty,
-        }
-    }
-}
-
 /// Instructions.
 ///
 impl DataFlowGraph {
--- a/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
@ -7,7 +7,6 @@

 use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
 use crate::isa::{CallConv, RegInfo, RegUnit};
-use crate::machinst::RelocDistance;
 use alloc::vec::Vec;
 use core::fmt;
 use core::str::FromStr;
@ -156,8 +155,6 @@ pub struct AbiParam {
    /// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
    /// been legalized.
    pub location: ArgumentLoc,
-    /// Was the argument converted to pointer during legalization?
-    pub legalized_to_pointer: bool,
 }

 impl AbiParam {
@ -168,7 +165,6 @@ impl AbiParam {
            extension: ArgumentExtension::None,
            purpose: ArgumentPurpose::Normal,
            location: Default::default(),
-            legalized_to_pointer: false,
        }
    }

@ -179,7 +175,6 @@ impl AbiParam {
            extension: ArgumentExtension::None,
            purpose,
            location: Default::default(),
-            legalized_to_pointer: false,
        }
    }

@ -190,7 +185,6 @@ impl AbiParam {
            extension: ArgumentExtension::None,
            purpose,
            location: ArgumentLoc::Reg(regunit),
-            legalized_to_pointer: false,
        }
    }

@ -224,9 +218,6 @@ pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
 impl<'a> fmt::Display for DisplayAbiParam<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.0.value_type)?;
-        if self.0.legalized_to_pointer {
-            write!(f, " ptr")?;
-        }
        match self.0.extension {
            ArgumentExtension::None => {}
            ArgumentExtension::Uext => write!(f, " uext")?,
@ -375,16 +366,6 @@ pub struct ExtFuncData {
    /// Will this function be defined nearby, such that it will always be a certain distance away,
    /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
    /// symbols meant to be preemptible cannot be considered colocated.
-    ///
-    /// If `true`, some backends may use relocation forms that have limited range. The exact
-    /// distance depends on the code model in use. Currently on AArch64, for example, Cranelift
-    /// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how
-    /// far away the target will be, it is best not to set the `colocated` flag; in general, this
-    /// flag is best used when the target is known to be in the same unit of code generation, such
-    /// as a Wasm module.
-    ///
-    /// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A
-    /// `colocated` flag value of `true` implies `RelocDistance::Near`.
    pub colocated: bool,
 }

@ -397,17 +378,6 @@ impl fmt::Display for ExtFuncData {
    }
 }

-impl ExtFuncData {
-    /// Return an estimate of the distance to the referred-to function symbol.
-    pub fn reloc_distance(&self) -> RelocDistance {
-        if self.colocated {
-            RelocDistance::Near
-        } else {
-            RelocDistance::Far
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@ -423,8 +393,6 @@ mod tests {
        assert_eq!(t.sext().to_string(), "i32 sext");
        t.purpose = ArgumentPurpose::StructReturn;
        assert_eq!(t.to_string(), "i32 uext sret");
-        t.legalized_to_pointer = true;
-        assert_eq!(t.to_string(), "i32 ptr uext sret");
    }

    #[test]
--- a/third_party/rust/cranelift-codegen/src/ir/function.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/function.rs
@ -308,30 +308,6 @@ impl Function {
        // function, assume it is not a leaf.
        self.dfg.signatures.is_empty()
    }
-
-    /// Replace the `dst` instruction's data with the `src` instruction's data
-    /// and then remove `src`.
-    ///
-    /// `src` and its result values should not be used at all, as any uses would
-    /// be left dangling after calling this method.
-    ///
-    /// `src` and `dst` must have the same number of resulting values, and
-    /// `src`'s i^th value must have the same type as `dst`'s i^th value.
-    pub fn transplant_inst(&mut self, dst: Inst, src: Inst) {
-        debug_assert_eq!(
-            self.dfg.inst_results(dst).len(),
-            self.dfg.inst_results(src).len()
-        );
-        debug_assert!(self
-            .dfg
-            .inst_results(dst)
-            .iter()
-            .zip(self.dfg.inst_results(src))
-            .all(|(a, b)| self.dfg.value_type(*a) == self.dfg.value_type(*b)));
-
-        self.dfg[dst] = self.dfg[src].clone();
-        self.layout.remove_inst(src);
-    }
 }

 /// Additional annotations for function display.
--- a/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
@ -3,7 +3,6 @@
 use crate::ir::immediates::{Imm64, Offset32};
 use crate::ir::{ExternalName, GlobalValue, Type};
 use crate::isa::TargetIsa;
-use crate::machinst::RelocDistance;
 use core::fmt;

 /// Information about a global value declaration.
@ -63,10 +62,6 @@ pub enum GlobalValueData {
        /// Will this symbol be defined nearby, such that it will always be a certain distance
        /// away, after linking? If so, references to it can avoid going through a GOT. Note that
        /// symbols meant to be preemptible cannot be colocated.
-        ///
-        /// If `true`, some backends may use relocation forms that have limited range: for example,
-        /// a +/- 2^27-byte range on AArch64. See the documentation for
-        /// [`RelocDistance`](machinst::RelocDistance) for more details.
        colocated: bool,

        /// Does this symbol refer to a thread local storage value?
@ -90,20 +85,6 @@ impl GlobalValueData {
            Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type,
        }
    }
-
-    /// If this global references a symbol, return an estimate of the relocation distance,
-    /// based on the `colocated` flag.
-    pub fn maybe_reloc_distance(&self) -> Option<RelocDistance> {
-        match self {
-            &GlobalValueData::Symbol {
-                colocated: true, ..
-            } => Some(RelocDistance::Near),
-            &GlobalValueData::Symbol {
-                colocated: false, ..
-            } => Some(RelocDistance::Far),
-            _ => None,
-        }
-    }
 }

 impl fmt::Display for GlobalValueData {
--- a/third_party/rust/cranelift-codegen/src/ir/immediates.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/immediates.rs
@ -62,21 +62,6 @@ impl Imm64 {
    pub fn bits(&self) -> i64 {
        self.0
    }
-
-    /// Sign extend this immediate as if it were a signed integer of the given
-    /// power-of-two width.
-    pub fn sign_extend_from_width(&mut self, bit_width: u16) {
-        debug_assert!(bit_width.is_power_of_two());
-
-        if bit_width >= 64 {
-            return;
-        }
-
-        let bit_width = bit_width as i64;
-        let delta = 64 - bit_width;
-        let sign_extended = (self.0 << delta) >> delta;
-        *self = Imm64(sign_extended);
-    }
 }

 impl Into<i64> for Imm64 {
--- a/third_party/rust/cranelift-codegen/src/ir/instructions.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/instructions.rs
@ -11,7 +11,9 @@ use core::fmt::{self, Display, Formatter};
 use core::ops::{Deref, DerefMut};
 use core::str::FromStr;

-use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
+use crate::ir;
+use crate::ir::types;
+use crate::ir::{Block, FuncRef, JumpTable, SigRef, Type, Value};
 use crate::isa;

 use crate::bitset::BitSet;
@ -255,30 +257,6 @@ impl InstructionData {
        }
    }

-    /// If this is a trapping instruction, get its trap code. Otherwise, return
-    /// `None`.
-    pub fn trap_code(&self) -> Option<TrapCode> {
-        match *self {
-            Self::CondTrap { code, .. }
-            | Self::FloatCondTrap { code, .. }
-            | Self::IntCondTrap { code, .. }
-            | Self::Trap { code, .. } => Some(code),
-            _ => None,
-        }
-    }
-
-    /// If this is a trapping instruction, get an exclusive reference to its
-    /// trap code. Otherwise, return `None`.
-    pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> {
-        match self {
-            Self::CondTrap { code, .. }
-            | Self::FloatCondTrap { code, .. }
-            | Self::IntCondTrap { code, .. }
-            | Self::Trap { code, .. } => Some(code),
-            _ => None,
-        }
-    }
-
    /// Return information about a call instruction.
    ///
    /// Any instruction that can call another function reveals its call signature here.
@ -296,39 +274,6 @@ impl InstructionData {
            }
        }
    }
-
-    #[inline]
-    pub(crate) fn sign_extend_immediates(&mut self, ctrl_typevar: Type) {
-        if ctrl_typevar.is_invalid() {
-            return;
-        }
-
-        let bit_width = ctrl_typevar.bits();
-
-        match self {
-            Self::BinaryImm64 {
-                opcode,
-                arg: _,
-                imm,
-            } => {
-                if matches!(opcode, Opcode::SdivImm | Opcode::SremImm) {
-                    imm.sign_extend_from_width(bit_width);
-                }
-            }
-            Self::IntCompareImm {
-                opcode,
-                arg: _,
-                cond,
-                imm,
-            } => {
-                debug_assert_eq!(*opcode, Opcode::IcmpImm);
-                if cond.unsigned() != *cond {
-                    imm.sign_extend_from_width(bit_width);
-                }
-            }
-            _ => {}
-        }
-    }
 }

 /// Information about branch and jump instructions.
--- a/third_party/rust/cranelift-codegen/src/ir/libcall.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/libcall.rs
@ -24,20 +24,6 @@ pub enum LibCall {
    /// probe for stack overflow. These are emitted for functions which need
    /// when the `enable_probestack` setting is true.
    Probestack,
-    /// udiv.i64
-    UdivI64,
-    /// sdiv.i64
-    SdivI64,
-    /// urem.i64
-    UremI64,
-    /// srem.i64
-    SremI64,
-    /// ishl.i64
-    IshlI64,
-    /// ushr.i64
-    UshrI64,
-    /// sshr.i64
-    SshrI64,
    /// ceil.f32
    CeilF32,
    /// ceil.f64
@ -77,13 +63,6 @@ impl FromStr for LibCall {
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "Probestack" => Ok(Self::Probestack),
-            "UdivI64" => Ok(Self::UdivI64),
-            "SdivI64" => Ok(Self::SdivI64),
-            "UremI64" => Ok(Self::UremI64),
-            "SremI64" => Ok(Self::SremI64),
-            "IshlI64" => Ok(Self::IshlI64),
-            "UshrI64" => Ok(Self::UshrI64),
-            "SshrI64" => Ok(Self::SshrI64),
            "CeilF32" => Ok(Self::CeilF32),
            "CeilF64" => Ok(Self::CeilF64),
            "FloorF32" => Ok(Self::FloorF32),
@ -109,16 +88,6 @@ impl LibCall {
    /// Returns `None` if no well-known library routine name exists for that instruction.
    pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
        Some(match ctrl_type {
-            types::I64 => match opcode {
-                Opcode::Udiv => Self::UdivI64,
-                Opcode::Sdiv => Self::SdivI64,
-                Opcode::Urem => Self::UremI64,
-                Opcode::Srem => Self::SremI64,
-                Opcode::Ishl => Self::IshlI64,
-                Opcode::Ushr => Self::UshrI64,
-                Opcode::Sshr => Self::SshrI64,
-                _ => return None,
-            },
            types::F32 => match opcode {
                Opcode::Ceil => Self::CeilF32,
                Opcode::Floor => Self::FloorF32,
--- a/third_party/rust/cranelift-codegen/src/ir/trapcode.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/trapcode.rs
@ -27,6 +27,9 @@ pub enum TrapCode {
    /// A `table_addr` instruction detected an out-of-bounds error.
    TableOutOfBounds,

+    /// Other bounds checking error.
+    OutOfBounds,
+
    /// Indirect call to a null table entry.
    IndirectCallToNull,

@ -60,6 +63,7 @@ impl Display for TrapCode {
            StackOverflow => "stk_ovf",
            HeapOutOfBounds => "heap_oob",
            TableOutOfBounds => "table_oob",
+            OutOfBounds => "oob",
            IndirectCallToNull => "icall_null",
            BadSignature => "bad_sig",
            IntegerOverflow => "int_ovf",
@ -82,6 +86,7 @@ impl FromStr for TrapCode {
            "stk_ovf" => Ok(StackOverflow),
            "heap_oob" => Ok(HeapOutOfBounds),
            "table_oob" => Ok(TableOutOfBounds),
+            "oob" => Ok(OutOfBounds),
            "icall_null" => Ok(IndirectCallToNull),
            "bad_sig" => Ok(BadSignature),
            "int_ovf" => Ok(IntegerOverflow),
@ -101,10 +106,11 @@ mod tests {
    use alloc::string::ToString;

    // Everything but user-defined codes.
-    const CODES: [TrapCode; 10] = [
+    const CODES: [TrapCode; 11] = [
        TrapCode::StackOverflow,
        TrapCode::HeapOutOfBounds,
        TrapCode::TableOutOfBounds,
+        TrapCode::OutOfBounds,
        TrapCode::IndirectCallToNull,
        TrapCode::BadSignature,
        TrapCode::IntegerOverflow,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@ -3,14 +3,14 @@
 // Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]

+use crate::binemit::CodeOffset;
 use crate::ir::Type;
 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::lower::ty_bits;
-use crate::machinst::MachLabel;

 use regalloc::{RealRegUniverse, Reg, Writable};

-use core::convert::Into;
+use core::convert::{Into, TryFrom};
 use std::string::String;

 /// A shift operator for a register or immediate.
@ -112,9 +112,7 @@ pub enum MemLabel {
 /// A memory argument to load/store, encapsulating the possible addressing modes.
 #[derive(Clone, Debug)]
 pub enum MemArg {
-    //
-    // Real ARM64 addressing modes:
-    //
+    Label(MemLabel),
    /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
    PostIndexed(Writable<Reg>, SImm9),
    /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
@ -139,35 +137,11 @@ pub enum MemArg {
    /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
    UnsignedOffset(Reg, UImm12Scaled),

-    //
-    // virtual addressing modes that are lowered at emission time:
-    //
-    /// Reference to a "label": e.g., a symbol.
-    Label(MemLabel),
+    /// Offset from the stack pointer. Lowered into a real amode at emission.
+    SPOffset(i64),

-    /// Arbitrary offset from a register. Converted to generation of large
-    /// offsets with multiple instructions as necessary during code emission.
-    RegOffset(Reg, i64, Type),
-
-    /// Offset from the stack pointer.
-    SPOffset(i64, Type),
-
-    /// Offset from the frame pointer.
-    FPOffset(i64, Type),
-
-    /// Offset from the "nominal stack pointer", which is where the real SP is
-    /// just after stack and spill slots are allocated in the function prologue.
-    /// At emission time, this is converted to `SPOffset` with a fixup added to
-    /// the offset constant. The fixup is a running value that is tracked as
-    /// emission iterates through instructions in linear order, and can be
-    /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
-    ///
-    /// The standard ABI is in charge of handling this (by emitting the
-    /// adjustment meta-instructions). It maintains the invariant that "nominal
-    /// SP" is where the actual SP is after the function prologue and before
-    /// clobber pushes. See the diagram in the documentation for
-    /// [crate::isa::aarch64::abi](the ABI module) for more details.
-    NominalSPOffset(i64, Type),
+    /// Offset from the frame pointer. Lowered into a real amode at emission.
+    FPOffset(i64),
 }

 impl MemArg {
@ -178,6 +152,17 @@ impl MemArg {
        MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
    }

+    /// Memory reference using an address in a register and an offset, if possible.
+    pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
+        if let Some(simm9) = SImm9::maybe_from_i64(offset) {
+            Some(MemArg::Unscaled(reg, simm9))
+        } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
+            Some(MemArg::UnsignedOffset(reg, uimm12s))
+        } else {
+            None
+        }
+    }
+
    /// Memory reference using the sum of two registers as an address.
    pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
        MemArg::RegReg(reg1, reg2)
@ -296,44 +281,78 @@ impl CondBrKind {

 /// A branch target. Either unresolved (basic-block index) or resolved (offset
 /// from end of current instruction).
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug)]
 pub enum BranchTarget {
-    /// An unresolved reference to a Label, as passed into
+    /// An unresolved reference to a BlockIndex, as passed into
    /// `lower_branch_group()`.
-    Label(MachLabel),
-    /// A fixed PC offset.
-    ResolvedOffset(i32),
+    Block(BlockIndex),
+    /// A resolved reference to another instruction, after
+    /// `Inst::with_block_offsets()`.
+    ResolvedOffset(isize),
 }

 impl BranchTarget {
-    /// Return the target's label, if it is a label-based target.
-    pub fn as_label(self) -> Option<MachLabel> {
+    /// Lower the branch target given offsets of each block.
+    pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
        match self {
-            BranchTarget::Label(l) => Some(l),
+            &mut BranchTarget::Block(bix) => {
+                let bix = usize::try_from(bix).unwrap();
+                assert!(bix < targets.len());
+                let block_offset_in_func = targets[bix];
+                let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
+                *self = BranchTarget::ResolvedOffset(branch_offset);
+            }
+            &mut BranchTarget::ResolvedOffset(..) => {}
+        }
+    }
+
+    /// Get the block index.
+    pub fn as_block_index(&self) -> Option<BlockIndex> {
+        match self {
+            &BranchTarget::Block(bix) => Some(bix),
            _ => None,
        }
    }

-    /// Return the target's offset, if specified, or zero if label-based.
-    pub fn as_offset19_or_zero(self) -> u32 {
-        let off = match self {
-            BranchTarget::ResolvedOffset(off) => off >> 2,
+    /// Get the offset as 4-byte words. Returns `0` if not
+    /// yet resolved (in that case, we're only computing
+    /// size and the offset doesn't matter).
+    pub fn as_offset_words(&self) -> isize {
+        match self {
+            &BranchTarget::ResolvedOffset(off) => off >> 2,
            _ => 0,
-        };
-        assert!(off <= 0x3ffff);
-        assert!(off >= -0x40000);
-        (off as u32) & 0x7ffff
+        }
    }

-    /// Return the target's offset, if specified, or zero if label-based.
-    pub fn as_offset26_or_zero(self) -> u32 {
-        let off = match self {
-            BranchTarget::ResolvedOffset(off) => off >> 2,
-            _ => 0,
-        };
-        assert!(off <= 0x1ffffff);
-        assert!(off >= -0x2000000);
-        (off as u32) & 0x3ffffff
+    /// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
+    pub fn as_off26(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 25)) && (off >= -(1 << 25)) {
+            Some((off as u32) & ((1 << 26) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Get the offset as a 19-bit offset, or `None` if overflow.
+    pub fn as_off19(&self) -> Option<u32> {
+        let off = self.as_offset_words();
+        if (off < (1 << 18)) && (off >= -(1 << 18)) {
+            Some((off as u32) & ((1 << 19) - 1))
+        } else {
+            None
+        }
+    }
+
+    /// Map the block index given a transform map.
+    pub fn map(&mut self, block_index_map: &[BlockIndex]) {
+        match self {
+            &mut BranchTarget::Block(ref mut bix) => {
+                let n = block_index_map[usize::try_from(*bix).unwrap()];
+                *bix = n;
+            }
+            &mut BranchTarget::ResolvedOffset(_) => {}
+        }
    }
 }

@ -424,11 +443,8 @@ impl ShowWithRRU for MemArg {
                simm9.show_rru(mb_rru)
            ),
            // Eliminated by `mem_finalize()`.
-            &MemArg::SPOffset(..)
-            | &MemArg::FPOffset(..)
-            | &MemArg::NominalSPOffset(..)
-            | &MemArg::RegOffset(..) => {
-                panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
+            &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                panic!("Unexpected stack-offset mem-arg mode!")
            }
        }
    }
@ -469,21 +485,18 @@ impl ShowWithRRU for Cond {
 impl ShowWithRRU for BranchTarget {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
        match self {
-            &BranchTarget::Label(label) => format!("label{:?}", label.get()),
+            &BranchTarget::Block(block) => format!("block{}", block),
            &BranchTarget::ResolvedOffset(off) => format!("{}", off),
        }
    }
 }

 /// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
-/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
-/// variants of vector instructions.
-/// TODO: Create a separate type for SIMD & floating-point operands.
+/// 64-bit variants of many instructions (and integer registers).
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum InstSize {
    Size32,
    Size64,
-    Size128,
 }

 impl InstSize {
@ -506,13 +519,11 @@ impl InstSize {
    /// Convert from a needed width to the smallest size that fits.
    pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
        let bits: usize = bits.into();
-        assert!(bits <= 128);
+        assert!(bits <= 64);
        if bits <= 32 {
            InstSize::Size32
-        } else if bits <= 64 {
-            InstSize::Size64
        } else {
-            InstSize::Size128
+            InstSize::Size64
        }
    }

@ -521,12 +532,11 @@ impl InstSize {
        Self::from_bits(ty_bits(ty))
    }

-    /// Convert to I32, I64, or I128.
+    /// Convert to I32 or I64.
    pub fn to_ty(self) -> Type {
        match self {
            InstSize::Size32 => I32,
            InstSize::Size64 => I64,
-            InstSize::Size128 => I128,
        }
    }

@ -534,9 +544,6 @@ impl InstSize {
        match self {
            InstSize::Size32 => 0,
            InstSize::Size64 => 1,
-            _ => {
-                panic!("Unexpected size");
-            }
        }
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@ -4,13 +4,12 @@ use crate::binemit::{CodeOffset, Reloc};
 use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
 use crate::ir::TrapCode;
-use crate::isa::aarch64::inst::*;
-use crate::isa::aarch64::lower::ty_bits;
+use crate::isa::aarch64::{inst::regs::PINNED_REG, inst::*};

 use regalloc::{Reg, RegClass, Writable};

+use alloc::vec::Vec;
 use core::convert::TryFrom;
-use log::debug;

 /// Memory label/reference finalization: convert a MemLabel to a PC-relative
 /// offset, possibly emitting relocation(s) as necessary.
@ -24,67 +23,43 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
 /// generic arbitrary stack offset) into real addressing modes, possibly by
 /// emitting some helper instructions that come immediately before the use
 /// of this amode.
-pub fn mem_finalize(
-    insn_off: CodeOffset,
-    mem: &MemArg,
-    state: &EmitState,
-) -> (SmallVec<[Inst; 4]>, MemArg) {
+pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
    match mem {
-        &MemArg::RegOffset(_, off, ty)
-        | &MemArg::SPOffset(off, ty)
-        | &MemArg::FPOffset(off, ty)
-        | &MemArg::NominalSPOffset(off, ty) => {
+        &MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
            let basereg = match mem {
-                &MemArg::RegOffset(reg, _, _) => reg,
-                &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
+                &MemArg::SPOffset(..) => stack_reg(),
                &MemArg::FPOffset(..) => fp_reg(),
                _ => unreachable!(),
            };
-            let adj = match mem {
-                &MemArg::NominalSPOffset(..) => {
-                    debug!(
-                        "mem_finalize: nominal SP offset {} + adj {} -> {}",
-                        off,
-                        state.virtual_sp_offset,
-                        off + state.virtual_sp_offset
-                    );
-                    state.virtual_sp_offset
-                }
-                _ => 0,
-            };
-            let off = off + adj;
-
            if let Some(simm9) = SImm9::maybe_from_i64(off) {
                let mem = MemArg::Unscaled(basereg, simm9);
-                (smallvec![], mem)
-            } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
-                let mem = MemArg::UnsignedOffset(basereg, uimm12s);
-                (smallvec![], mem)
+                (vec![], mem)
            } else {
+                // In an addition, x31 is the zero register, not sp; we have only one temporary
+                // so we can't do the proper add here.
+                debug_assert_ne!(
+                    basereg,
+                    stack_reg(),
+                    "should have diverted SP before mem_finalize"
+                );
+
                let tmp = writable_spilltmp_reg();
                let mut const_insts = Inst::load_constant(tmp, off as u64);
-                // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
-                // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
-                // is a valid base (for SPOffset) which we must handle here.
-                // Also, SP needs to be the first arg, not second.
-                let add_inst = Inst::AluRRRExtend {
+                let add_inst = Inst::AluRRR {
                    alu_op: ALUOp::Add64,
                    rd: tmp,
-                    rn: basereg,
-                    rm: tmp.to_reg(),
-                    extendop: ExtendOp::UXTX,
+                    rn: tmp.to_reg(),
+                    rm: basereg,
                };
                const_insts.push(add_inst);
-                (const_insts, MemArg::reg(tmp.to_reg()))
+                (const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
            }
        }
-
        &MemArg::Label(ref label) => {
            let off = memlabel_finalize(insn_off, label);
-            (smallvec![], MemArg::Label(MemLabel::PCRel(off)))
+            (vec![], MemArg::Label(MemLabel::PCRel(off)))
        }
-
-        _ => (smallvec![], mem.clone()),
+        _ => (vec![], mem.clone()),
    }
 }

@ -98,12 +73,12 @@ pub fn u64_constant(bits: u64) -> ConstantData {
 // Instructions and subcomponents: emission

 fn machreg_to_gpr(m: Reg) -> u32 {
-    assert_eq!(m.get_class(), RegClass::I64);
+    assert!(m.get_class() == RegClass::I64);
    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }

 fn machreg_to_vec(m: Reg) -> u32 {
-    assert_eq!(m.get_class(), RegClass::V128);
+    assert!(m.get_class() == RegClass::V128);
    u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
 }

@ -162,14 +137,6 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
 }

-fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
-    match kind {
-        CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
-        CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
-        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
-    }
-}
-
 const MOVE_WIDE_FIXED: u32 = 0x92800000;

 #[repr(u32)]
@ -308,8 +275,8 @@ fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) ->
 }

 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert!(!is_16b); // to be supported later.
    0b00001110_101_00000_00011_1_00000_00000
-        | ((is_16b as u32) << 30)
        | machreg_to_vec(rd.to_reg())
        | (machreg_to_vec(rn) << 16)
        | (machreg_to_vec(rn) << 5)
@ -355,29 +322,8 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
 }

-fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
-    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
-    let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
-    bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
-}
-
-/// State carried between emissions of a sequence of instructions.
-#[derive(Default, Clone, Debug)]
-pub struct EmitState {
-    virtual_sp_offset: i64,
-}
-
-impl MachInstEmit for Inst {
-    type State = EmitState;
-
-    fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
-        // N.B.: we *must* not exceed the "worst-case size" used to compute
-        // where to insert islands, except when islands are explicitly triggered
-        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
-        // to allow disabling the check for `JTSequence`, which is always
-        // emitted following an `EmitIsland`.
-        let mut start_off = sink.cur_offset();
-
+impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
+    fn emit(&self, sink: &mut O, flags: &settings::Flags) {
        match self {
            &Inst::AluRRR { alu_op, rd, rn, rm } => {
                let top11 = match alu_op {
@ -650,10 +596,10 @@ impl MachInstEmit for Inst {
                ref mem,
                srcloc,
            } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);

                for inst in mem_insts.into_iter() {
-                    inst.emit(sink, flags, state);
+                    inst.emit(sink, flags);
                }

                // ldst encoding helpers take Reg, not Writable<Reg>.
@ -662,17 +608,17 @@ impl MachInstEmit for Inst {
                // This is the base opcode (top 10 bits) for the "unscaled
                // immediate" form (Unscaled). Other addressing modes will OR in
                // other values for bits 24/25 (bits 1/2 of this constant).
-                let (op, bits) = match self {
-                    &Inst::ULoad8 { .. } => (0b0011100001, 8),
-                    &Inst::SLoad8 { .. } => (0b0011100010, 8),
-                    &Inst::ULoad16 { .. } => (0b0111100001, 16),
-                    &Inst::SLoad16 { .. } => (0b0111100010, 16),
-                    &Inst::ULoad32 { .. } => (0b1011100001, 32),
-                    &Inst::SLoad32 { .. } => (0b1011100010, 32),
-                    &Inst::ULoad64 { .. } => (0b1111100001, 64),
-                    &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
-                    &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
-                    &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
+                let op = match self {
+                    &Inst::ULoad8 { .. } => 0b0011100001,
+                    &Inst::SLoad8 { .. } => 0b0011100010,
+                    &Inst::ULoad16 { .. } => 0b0111100001,
+                    &Inst::SLoad16 { .. } => 0b0111100010,
+                    &Inst::ULoad32 { .. } => 0b1011100001,
+                    &Inst::SLoad32 { .. } => 0b1011100010,
+                    &Inst::ULoad64 { .. } => 0b1111100001,
+                    &Inst::FpuLoad32 { .. } => 0b1011110001,
+                    &Inst::FpuLoad64 { .. } => 0b1111110001,
+                    &Inst::FpuLoad128 { .. } => 0b0011110011,
                    _ => unreachable!(),
                };

@ -686,9 +632,6 @@ impl MachInstEmit for Inst {
                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
                    }
                    &MemArg::UnsignedOffset(reg, uimm12scaled) => {
-                        if uimm12scaled.value() != 0 {
-                            assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
-                        }
                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
                    }
                    &MemArg::RegReg(r1, r2) => {
@ -697,7 +640,19 @@ impl MachInstEmit for Inst {
                        ));
                    }
                    &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
-                        assert_eq!(bits, ty_bits(ty));
+                        match (ty, self) {
+                            (I8, &Inst::ULoad8 { .. }) => {}
+                            (I8, &Inst::SLoad8 { .. }) => {}
+                            (I16, &Inst::ULoad16 { .. }) => {}
+                            (I16, &Inst::SLoad16 { .. }) => {}
+                            (I32, &Inst::ULoad32 { .. }) => {}
+                            (I32, &Inst::SLoad32 { .. }) => {}
+                            (I64, &Inst::ULoad64 { .. }) => {}
+                            (F32, &Inst::FpuLoad32 { .. }) => {}
+                            (F64, &Inst::FpuLoad64 { .. }) => {}
+                            (I128, &Inst::FpuLoad128 { .. }) => {}
+                            _ => panic!("Mismatching reg-scaling type in MemArg"),
+                        }
                        let extendop = match &mem {
                            &MemArg::RegScaled(..) => None,
                            &MemArg::RegScaledExtended(_, _, _, op) => Some(op),
@ -742,10 +697,9 @@ impl MachInstEmit for Inst {
                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
                    }
                    // Eliminated by `mem_finalize()` above.
-                    &MemArg::SPOffset(..)
-                    | &MemArg::FPOffset(..)
-                    | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
-                    &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+                    &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
                }
            }

@ -785,20 +739,20 @@ impl MachInstEmit for Inst {
                ref mem,
                srcloc,
            } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+                let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);

                for inst in mem_insts.into_iter() {
-                    inst.emit(sink, flags, state);
+                    inst.emit(sink, flags);
                }

-                let (op, bits) = match self {
-                    &Inst::Store8 { .. } => (0b0011100000, 8),
-                    &Inst::Store16 { .. } => (0b0111100000, 16),
-                    &Inst::Store32 { .. } => (0b1011100000, 32),
-                    &Inst::Store64 { .. } => (0b1111100000, 64),
-                    &Inst::FpuStore32 { .. } => (0b1011110000, 32),
-                    &Inst::FpuStore64 { .. } => (0b1111110000, 64),
-                    &Inst::FpuStore128 { .. } => (0b0011110010, 128),
+                let op = match self {
+                    &Inst::Store8 { .. } => 0b0011100000,
+                    &Inst::Store16 { .. } => 0b0111100000,
+                    &Inst::Store32 { .. } => 0b1011100000,
+                    &Inst::Store64 { .. } => 0b1111100000,
+                    &Inst::FpuStore32 { .. } => 0b1011110000,
+                    &Inst::FpuStore64 { .. } => 0b1111110000,
+                    &Inst::FpuStore128 { .. } => 0b0011110010,
                    _ => unreachable!(),
                };

@ -812,9 +766,6 @@ impl MachInstEmit for Inst {
                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
                    }
                    &MemArg::UnsignedOffset(reg, uimm12scaled) => {
-                        if uimm12scaled.value() != 0 {
-                            assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
-                        }
                        sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
                    }
                    &MemArg::RegReg(r1, r2) => {
@ -843,10 +794,9 @@ impl MachInstEmit for Inst {
                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
                    }
                    // Eliminated by `mem_finalize()` above.
-                    &MemArg::SPOffset(..)
-                    | &MemArg::FPOffset(..)
-                    | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
-                    &MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+                    &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
+                        panic!("Should not see stack-offset here!")
+                    }
                }
            }

@ -933,9 +883,6 @@ impl MachInstEmit for Inst {
            &Inst::FpuMove64 { rd, rn } => {
                sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
            }
-            &Inst::FpuMove128 { rd, rn } => {
-                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
-            }
            &Inst::FpuRR { fpu_op, rd, rn } => {
                let top22 = match fpu_op {
                    FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
@ -966,44 +913,6 @@ impl MachInstEmit for Inst {
                };
                sink.put4(enc_fpurrr(top22, rd, rn, rm));
            }
-            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
-                FPUOpRI::UShr32(imm) => {
-                    debug_assert_eq!(32, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-                FPUOpRI::UShr64(imm) => {
-                    debug_assert_eq!(64, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-                FPUOpRI::Sli64(imm) => {
-                    debug_assert_eq!(64, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b01_1_111110_0000000_010101_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-                FPUOpRI::Sli32(imm) => {
-                    debug_assert_eq!(32, imm.lane_size_in_bits);
-                    sink.put4(
-                        0b0_0_1_011110_0000000_010101_00000_00000
-                            | imm.enc() << 16
-                            | machreg_to_vec(rn) << 5
-                            | machreg_to_vec(rd.to_reg()),
-                    )
-                }
-            },
            &Inst::FpuRRRR {
                fpu_op,
                rd,
@ -1017,15 +926,6 @@ impl MachInstEmit for Inst {
                };
                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
            }
-            &Inst::VecMisc { op, rd, rn, ty } => {
-                let bits_12_16 = match op {
-                    VecMisc2::Not => {
-                        debug_assert_eq!(I8X16, ty);
-                        0b00101
-                    }
-                };
-                sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
-            }
            &Inst::FpuCmp32 { rn, rm } => {
                sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
            }
@ -1080,11 +980,11 @@ impl MachInstEmit for Inst {
                    mem: MemArg::Label(MemLabel::PCRel(8)),
                    srcloc: None,
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                let inst = Inst::Jump {
                    dest: BranchTarget::ResolvedOffset(8),
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                sink.put4(const_data.to_bits());
            }
            &Inst::LoadFpuConst64 { rd, const_data } => {
@ -1093,29 +993,13 @@ impl MachInstEmit for Inst {
                    mem: MemArg::Label(MemLabel::PCRel(8)),
                    srcloc: None,
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                let inst = Inst::Jump {
                    dest: BranchTarget::ResolvedOffset(12),
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                sink.put8(const_data.to_bits());
            }
-            &Inst::LoadFpuConst128 { rd, const_data } => {
-                let inst = Inst::FpuLoad128 {
-                    rd,
-                    mem: MemArg::Label(MemLabel::PCRel(8)),
-                    srcloc: None,
-                };
-                inst.emit(sink, flags, state);
-                let inst = Inst::Jump {
-                    dest: BranchTarget::ResolvedOffset(20),
-                };
-                inst.emit(sink, flags, state);
-
-                for i in const_data.to_le_bytes().iter() {
-                    sink.put1(*i);
-                }
-            }
            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
            }
@ -1149,40 +1033,12 @@ impl MachInstEmit for Inst {
                        | machreg_to_gpr(rd.to_reg()),
                );
            }
-            &Inst::VecRRR {
-                rd,
-                rn,
-                rm,
-                alu_op,
-                ty,
-            } => {
-                let enc_size_for_cmp = match ty {
-                    I8X16 => 0b00,
-                    _ => 0,
-                };
-
+            &Inst::VecRRR { rd, rn, rm, alu_op } => {
                let (top11, bit15_10) = match alu_op {
-                    VecALUOp::SQAddScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b010_11110_11_1, 0b000011)
-                    }
-                    VecALUOp::SQSubScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b010_11110_11_1, 0b001011)
-                    }
-                    VecALUOp::UQAddScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b011_11110_11_1, 0b000011)
-                    }
-                    VecALUOp::UQSubScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b011_11110_11_1, 0b001011)
-                    }
-                    VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
-                    VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
-                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
-                    VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
-                    VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
+                    VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
+                    VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
+                    VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
+                    VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
                };
                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
            }
@ -1228,7 +1084,7 @@ impl MachInstEmit for Inst {
                if top22 != 0 {
                    sink.put4(enc_extend(top22, rd, rn));
                } else {
-                    Inst::mov32(rd, rn).emit(sink, flags, state);
+                    Inst::mov32(rd, rn).emit(sink, flags);
                }
            }
            &Inst::Extend {
@ -1251,7 +1107,7 @@ impl MachInstEmit for Inst {
                    rn: zero_reg(),
                    rm: rd.to_reg(),
                };
-                sub_inst.emit(sink, flags, state);
+                sub_inst.emit(sink, flags);
            }
            &Inst::Extend {
                rd,
@ -1271,14 +1127,10 @@ impl MachInstEmit for Inst {
                panic!("Unsupported extend variant");
            }
            &Inst::Jump { ref dest } => {
-                let off = sink.cur_offset();
-                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
-                if let Some(l) = dest.as_label() {
-                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
-                    sink.add_uncond_branch(off, off + 4, l);
-                }
-                // Emit the jump itself.
-                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
+                // TODO: differentiate between as_off26() returning `None` for
+                // out-of-range vs. not-yet-finalized. The latter happens when we
+                // do early (fake) emission for size computation.
+                sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
            }
            &Inst::Ret => {
                sink.put4(0xd65f03c0);
@ -1286,47 +1138,71 @@ impl MachInstEmit for Inst {
            &Inst::EpiloguePlaceholder => {
                // Noop; this is just a placeholder for epilogues.
            }
-            &Inst::Call { ref info } => {
-                sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
+            &Inst::Call {
+                ref dest,
+                loc,
+                opcode,
+                ..
+            } => {
+                sink.add_reloc(loc, Reloc::Arm64Call, dest, 0);
                sink.put4(enc_jump26(0b100101, 0));
-                if info.opcode.is_call() {
-                    sink.add_call_site(info.loc, info.opcode);
+                if opcode.is_call() {
+                    sink.add_call_site(loc, opcode);
                }
            }
-            &Inst::CallInd { ref info } => {
-                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
-                if info.opcode.is_call() {
-                    sink.add_call_site(info.loc, info.opcode);
+            &Inst::CallInd {
+                rn, loc, opcode, ..
+            } => {
+                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
+                if opcode.is_call() {
+                    sink.add_call_site(loc, opcode);
                }
            }
-            &Inst::CondBr {
+            &Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
+            &Inst::CondBrLowered { target, kind } => match kind {
+                // TODO: handle >2^19 case by emitting a compound sequence with
+                // an unconditional (26-bit) branch. We need branch-relaxation
+                // adjustment machinery to enable this (because we don't want to
+                // always emit the long form).
+                CondBrKind::Zero(reg) => {
+                    sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
+                }
+                CondBrKind::NotZero(reg) => {
+                    sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
+                }
+                CondBrKind::Cond(c) => {
+                    sink.put4(enc_cbr(
+                        0b01010100,
+                        target.as_off19().unwrap_or(0),
+                        0b0,
+                        c.bits(),
+                    ));
+                }
+            },
+            &Inst::CondBrLoweredCompound {
                taken,
                not_taken,
                kind,
            } => {
                // Conditional part first.
-                let cond_off = sink.cur_offset();
-                if let Some(l) = taken.as_label() {
-                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
-                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
-                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
+                match kind {
+                    CondBrKind::Zero(reg) => {
+                        sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
+                    }
+                    CondBrKind::NotZero(reg) => {
+                        sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
+                    }
+                    CondBrKind::Cond(c) => {
+                        sink.put4(enc_cbr(
+                            0b01010100,
+                            taken.as_off19().unwrap_or(0),
+                            0b0,
+                            c.bits(),
+                        ));
+                    }
                }
-                sink.put4(enc_conditional_br(taken, kind));
-
-                // Unconditional part next.
-                let uncond_off = sink.cur_offset();
-                if let Some(l) = not_taken.as_label() {
-                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
-                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
-                }
-                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
-            }
-            &Inst::OneWayCondBr { target, kind } => {
-                let off = sink.cur_offset();
-                if let Some(l) = target.as_label() {
-                    sink.use_label_at_offset(off, l, LabelUse::Branch19);
-                }
-                sink.put4(enc_conditional_br(target, kind));
+                // Unconditional part.
+                sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
            }
            &Inst::IndirectBr { rn, .. } => {
                sink.put4(enc_br(rn));
@ -1343,7 +1219,8 @@ impl MachInstEmit for Inst {
                sink.add_trap(srcloc, code);
                sink.put4(0xd4a00000);
            }
-            &Inst::Adr { rd, off } => {
+            &Inst::Adr { rd, ref label } => {
+                let off = memlabel_finalize(sink.cur_offset_from_start(), label);
                assert!(off > -(1 << 20));
                assert!(off < (1 << 20));
                sink.put4(enc_adr(off, rd));
@ -1358,20 +1235,26 @@ impl MachInstEmit for Inst {
                ridx,
                rtmp1,
                rtmp2,
-                ref info,
+                ref targets,
                ..
            } => {
                // This sequence is *one* instruction in the vcode, and is expanded only here at
                // emission time, because we cannot allow the regalloc to insert spills/reloads in
                // the middle; we depend on hardcoded PC-rel addressing below.
+                //
+                // N.B.: if PC-rel addressing on ADR below is changed, also update
+                // `Inst::with_block_offsets()` in aarch64/inst/mod.rs.

                // Save index in a tmp (the live range of ridx only goes to start of this
                // sequence; rtmp1 or rtmp2 may overwrite it).
                let inst = Inst::gen_move(rtmp2, ridx, I64);
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                // Load address of jump table
-                let inst = Inst::Adr { rd: rtmp1, off: 16 };
-                inst.emit(sink, flags, state);
+                let inst = Inst::Adr {
+                    rd: rtmp1,
+                    label: MemLabel::PCRel(16),
+                };
+                inst.emit(sink, flags);
                // Load value out of jump table
                let inst = Inst::SLoad32 {
                    rd: rtmp2,
@ -1383,7 +1266,7 @@ impl MachInstEmit for Inst {
                    ),
                    srcloc: None, // can't cause a user trap.
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                // Add base of jump table to jump-table-sourced block offset
                let inst = Inst::AluRRR {
                    alu_op: ALUOp::Add64,
@ -1391,30 +1274,22 @@ impl MachInstEmit for Inst {
                    rn: rtmp1.to_reg(),
                    rm: rtmp2.to_reg(),
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                // Branch to computed address. (`targets` here is only used for successor queries
                // and is not needed for emission.)
                let inst = Inst::IndirectBr {
                    rn: rtmp1.to_reg(),
                    targets: vec![],
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                // Emit jump table (table of 32-bit offsets).
-                let jt_off = sink.cur_offset();
-                for &target in info.targets.iter() {
-                    let word_off = sink.cur_offset();
-                    let off_into_table = word_off - jt_off;
-                    sink.use_label_at_offset(
-                        word_off,
-                        target.as_label().unwrap(),
-                        LabelUse::PCRel32,
-                    );
-                    sink.put4(off_into_table);
+                for target in targets {
+                    let off = target.as_offset_words() * 4;
+                    let off = i32::try_from(off).unwrap();
+                    // cast i32 to u32 (two's-complement)
+                    let off = off as u32;
+                    sink.put4(off);
                }
-
-                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
-                // disable the worst-case-size check in this case.
-                start_off = sink.cur_offset();
            }
            &Inst::LoadConst64 { rd, const_data } => {
                let inst = Inst::ULoad64 {
@ -1422,11 +1297,11 @@ impl MachInstEmit for Inst {
                    mem: MemArg::Label(MemLabel::PCRel(8)),
                    srcloc: None, // can't cause a user trap.
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                let inst = Inst::Jump {
                    dest: BranchTarget::ResolvedOffset(12),
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                sink.put8(const_data);
            }
            &Inst::LoadExtName {
@ -1440,11 +1315,11 @@ impl MachInstEmit for Inst {
                    mem: MemArg::Label(MemLabel::PCRel(8)),
                    srcloc: None, // can't cause a user trap.
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                let inst = Inst::Jump {
                    dest: BranchTarget::ResolvedOffset(12),
                };
-                inst.emit(sink, flags, state);
+                inst.emit(sink, flags);
                sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
                if flags.emit_all_ones_funcaddrs() {
                    sink.put8(u64::max_value());
@ -1452,82 +1327,53 @@ impl MachInstEmit for Inst {
                    sink.put8(0);
                }
            }
-            &Inst::LoadAddr { rd, ref mem } => {
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
-                for inst in mem_insts.into_iter() {
-                    inst.emit(sink, flags, state);
-                }
-
-                let (reg, offset) = match mem {
-                    MemArg::Unscaled(r, simm9) => (r, simm9.value()),
-                    MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
-                    _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
-                };
-                let abs_offset = if offset < 0 {
-                    -offset as u64
-                } else {
-                    offset as u64
-                };
-                let alu_op = if offset < 0 {
-                    ALUOp::Sub64
-                } else {
-                    ALUOp::Add64
-                };
-
-                if offset == 0 {
-                    let mov = Inst::mov(rd, reg);
-                    mov.emit(sink, flags, state);
-                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
-                    let add = Inst::AluRRImm12 {
-                        alu_op,
-                        rd,
-                        rn: reg,
-                        imm12,
+            &Inst::LoadAddr { rd, ref mem } => match *mem {
+                MemArg::FPOffset(fp_off) => {
+                    let alu_op = if fp_off < 0 {
+                        ALUOp::Sub64
+                    } else {
+                        ALUOp::Add64
                    };
-                    add.emit(sink, flags, state);
-                } else {
-                    // Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
-                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
-                    // that no other instructions will be inserted here (we're emitting directly),
-                    // and a live range of `tmp2` should not span this instruction, so this use
-                    // should otherwise be correct.
-                    debug_assert!(rd.to_reg() != tmp2_reg());
-                    debug_assert!(reg != tmp2_reg());
-                    let tmp = writable_tmp2_reg();
-                    for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
-                        insn.emit(sink, flags, state);
+                    if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
+                    {
+                        let inst = Inst::AluRRImm12 {
+                            alu_op,
+                            rd,
+                            imm12,
+                            rn: fp_reg(),
+                        };
+                        inst.emit(sink, flags);
+                    } else {
+                        let const_insts =
+                            Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
+                        for inst in const_insts {
+                            inst.emit(sink, flags);
+                        }
+                        let inst = Inst::AluRRR {
+                            alu_op,
+                            rd,
+                            rn: fp_reg(),
+                            rm: rd.to_reg(),
+                        };
+                        inst.emit(sink, flags);
                    }
-                    let add = Inst::AluRRR {
-                        alu_op,
-                        rd,
-                        rn: reg,
-                        rm: tmp.to_reg(),
-                    };
-                    add.emit(sink, flags, state);
                }
+                _ => unimplemented!("{:?}", mem),
+            },
+            &Inst::GetPinnedReg { rd } => {
+                let inst = Inst::Mov {
+                    rd,
+                    rm: xreg(PINNED_REG),
+                };
+                inst.emit(sink, flags);
            }
-            &Inst::VirtualSPOffsetAdj { offset } => {
-                debug!(
-                    "virtual sp offset adjusted by {} -> {}",
-                    offset,
-                    state.virtual_sp_offset + offset
-                );
-                state.virtual_sp_offset += offset;
-            }
-            &Inst::EmitIsland { needed_space } => {
-                if sink.island_needed(needed_space + 4) {
-                    let jump_around_label = sink.get_label();
-                    let jmp = Inst::Jump {
-                        dest: BranchTarget::Label(jump_around_label),
-                    };
-                    jmp.emit(sink, flags, state);
-                    sink.emit_island();
-                    sink.bind_label(jump_around_label);
-                }
+            &Inst::SetPinnedReg { rm } => {
+                let inst = Inst::Mov {
+                    rd: Writable::from_reg(xreg(PINNED_REG)),
+                    rm,
+                };
+                inst.emit(sink, flags);
            }
        }
-
-        let end_off = sink.cur_offset();
-        debug_assert!((end_off - start_off) <= Inst::worst_case_size());
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@ -3,7 +3,6 @@ use crate::isa::aarch64::inst::*;
 use crate::isa::test_utils;
 use crate::settings;

-use alloc::boxed::Box;
 use alloc::vec::Vec;

 #[test]
@ -1311,68 +1310,38 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::ULoad64 {
            rd: writable_xreg(1),
-            mem: MemArg::FPOffset(32768, I8),
+            mem: MemArg::FPOffset(32768),
            srcloc: None,
        },
-        "100090D2B063308B010240F9",
-        "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+        "0F0090D2EF011D8BE10140F9",
+        "movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
    ));
    insns.push((
        Inst::ULoad64 {
            rd: writable_xreg(1),
-            mem: MemArg::FPOffset(-32768, I8),
+            mem: MemArg::FPOffset(-32768),
            srcloc: None,
        },
-        "F0FF8F92B063308B010240F9",
-        "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+        "EFFF8F92EF011D8BE10140F9",
+        "movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
    ));
    insns.push((
        Inst::ULoad64 {
            rd: writable_xreg(1),
-            mem: MemArg::FPOffset(1048576, I8), // 2^20
+            mem: MemArg::FPOffset(1048576), // 2^20
            srcloc: None,
        },
-        "1002A0D2B063308B010240F9",
-        "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+        "0F02A0D2EF011D8BE10140F9",
+        "movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
    ));
    insns.push((
        Inst::ULoad64 {
            rd: writable_xreg(1),
-            mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1
+            mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
            srcloc: None,
        },
-        "300080D21002A0F2B063308B010240F9",
-        "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
-    ));
-
-    insns.push((
-        Inst::ULoad64 {
-            rd: writable_xreg(1),
-            mem: MemArg::RegOffset(xreg(7), 8, I64),
-            srcloc: None,
-        },
-        "E18040F8",
-        "ldur x1, [x7, #8]",
-    ));
-
-    insns.push((
-        Inst::ULoad64 {
-            rd: writable_xreg(1),
-            mem: MemArg::RegOffset(xreg(7), 1024, I64),
-            srcloc: None,
-        },
-        "E10042F9",
-        "ldr x1, [x7, #1024]",
-    ));
-
-    insns.push((
-        Inst::ULoad64 {
-            rd: writable_xreg(1),
-            mem: MemArg::RegOffset(xreg(7), 1048576, I64),
-            srcloc: None,
-        },
-        "1002A0D2F060308B010240F9",
-        "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
+        "2F0080D20F02A0F2EF011D8BE10140F9",
+        "movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
    ));

    insns.push((
@ -1832,7 +1801,6 @@ fn test_aarch64_binemit() {
            rn: vreg(22),
            rm: vreg(23),
            alu_op: VecALUOp::UQAddScalar,
-            ty: I64,
        },
        "D50EF77E",
        "uqadd d21, d22, d23",
@ -1843,7 +1811,6 @@ fn test_aarch64_binemit() {
            rn: vreg(22),
            rm: vreg(23),
            alu_op: VecALUOp::SQAddScalar,
-            ty: I64,
        },
        "D50EF75E",
        "sqadd d21, d22, d23",
@ -1854,7 +1821,6 @@ fn test_aarch64_binemit() {
            rn: vreg(22),
            rm: vreg(23),
            alu_op: VecALUOp::UQSubScalar,
-            ty: I64,
        },
        "D52EF77E",
        "uqsub d21, d22, d23",
@ -1865,83 +1831,10 @@ fn test_aarch64_binemit() {
            rn: vreg(22),
            rm: vreg(23),
            alu_op: VecALUOp::SQSubScalar,
-            ty: I64,
        },
        "D52EF75E",
        "sqsub d21, d22, d23",
    ));
-
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Cmeq,
-            rd: writable_vreg(3),
-            rn: vreg(23),
-            rm: vreg(24),
-            ty: I8X16,
-        },
-        "E38E386E",
-        "cmeq v3.16b, v23.16b, v24.16b",
-    ));
-
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Cmgt,
-            rd: writable_vreg(3),
-            rn: vreg(23),
-            rm: vreg(24),
-            ty: I8X16,
-        },
-        "E336384E",
-        "cmgt v3.16b, v23.16b, v24.16b",
-    ));
-
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Cmge,
-            rd: writable_vreg(23),
-            rn: vreg(9),
-            rm: vreg(12),
-            ty: I8X16,
-        },
-        "373D2C4E",
-        "cmge v23.16b, v9.16b, v12.16b",
-    ));
-
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Cmhi,
-            rd: writable_vreg(5),
-            rn: vreg(1),
-            rm: vreg(1),
-            ty: I8X16,
-        },
-        "2534216E",
-        "cmhi v5.16b, v1.16b, v1.16b",
-    ));
-
-    insns.push((
-        Inst::VecRRR {
-            alu_op: VecALUOp::Cmhs,
-            rd: writable_vreg(8),
-            rn: vreg(2),
-            rm: vreg(15),
-            ty: I8X16,
-        },
-        "483C2F6E",
-        "cmhs v8.16b, v2.16b, v15.16b",
-    ));
-
-    insns.push((
-        Inst::VecMisc {
-            op: VecMisc2::Not,
-            rd: writable_vreg(2),
-            rn: vreg(1),
-            ty: I8X16,
-        },
-        "2258206E",
-        "mvn v2.16b, v1.16b",
-    ));
-
    insns.push((
        Inst::Extend {
            rd: writable_xreg(1),
@ -2062,7 +1955,7 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Zero(xreg(8)),
        },
@ -2070,7 +1963,7 @@ fn test_aarch64_binemit() {
        "cbz x8, 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::NotZero(xreg(8)),
        },
@ -2078,7 +1971,7 @@ fn test_aarch64_binemit() {
        "cbnz x8, 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Eq),
        },
@ -2086,7 +1979,7 @@ fn test_aarch64_binemit() {
        "b.eq 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Ne),
        },
@ -2095,7 +1988,7 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Hs),
        },
@ -2103,7 +1996,7 @@ fn test_aarch64_binemit() {
        "b.hs 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Lo),
        },
@ -2111,7 +2004,7 @@ fn test_aarch64_binemit() {
        "b.lo 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Mi),
        },
@ -2119,7 +2012,7 @@ fn test_aarch64_binemit() {
        "b.mi 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Pl),
        },
@ -2127,7 +2020,7 @@ fn test_aarch64_binemit() {
        "b.pl 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Vs),
        },
@ -2135,7 +2028,7 @@ fn test_aarch64_binemit() {
        "b.vs 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Vc),
        },
@ -2143,7 +2036,7 @@ fn test_aarch64_binemit() {
        "b.vc 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Hi),
        },
@ -2151,7 +2044,7 @@ fn test_aarch64_binemit() {
        "b.hi 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Ls),
        },
@ -2159,7 +2052,7 @@ fn test_aarch64_binemit() {
        "b.ls 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Ge),
        },
@ -2167,7 +2060,7 @@ fn test_aarch64_binemit() {
        "b.ge 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Lt),
        },
@ -2175,7 +2068,7 @@ fn test_aarch64_binemit() {
        "b.lt 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Gt),
        },
@ -2183,7 +2076,7 @@ fn test_aarch64_binemit() {
        "b.gt 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Le),
        },
@ -2191,7 +2084,7 @@ fn test_aarch64_binemit() {
        "b.le 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Al),
        },
@ -2199,7 +2092,7 @@ fn test_aarch64_binemit() {
        "b.al 64",
    ));
    insns.push((
-        Inst::OneWayCondBr {
+        Inst::CondBrLowered {
            target: BranchTarget::ResolvedOffset(64),
            kind: CondBrKind::Cond(Cond::Nv),
        },
@ -2208,7 +2101,7 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::CondBr {
+        Inst::CondBrLoweredCompound {
            taken: BranchTarget::ResolvedOffset(64),
            not_taken: BranchTarget::ResolvedOffset(128),
            kind: CondBrKind::Cond(Cond::Le),
@ -2219,13 +2112,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::Call {
-            info: Box::new(CallInfo {
-                dest: ExternalName::testcase("test0"),
-                uses: Vec::new(),
-                defs: Vec::new(),
-                loc: SourceLoc::default(),
-                opcode: Opcode::Call,
-            }),
+            dest: ExternalName::testcase("test0"),
+            uses: Set::empty(),
+            defs: Set::empty(),
+            loc: SourceLoc::default(),
+            opcode: Opcode::Call,
        },
        "00000094",
        "bl 0",
@ -2233,13 +2124,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::CallInd {
-            info: Box::new(CallIndInfo {
-                rn: xreg(10),
-                uses: Vec::new(),
-                defs: Vec::new(),
-                loc: SourceLoc::default(),
-                opcode: Opcode::CallIndirect,
-            }),
+            rn: xreg(10),
+            uses: Set::empty(),
+            defs: Set::empty(),
+            loc: SourceLoc::default(),
+            opcode: Opcode::CallIndirect,
        },
        "40013FD6",
        "blr x10",
@ -2248,7 +2137,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::IndirectBr {
            rn: xreg(3),
-            targets: vec![],
+            targets: vec![1, 2, 3],
        },
        "60001FD6",
        "br x3",
@ -2259,7 +2148,7 @@ fn test_aarch64_binemit() {
    insns.push((
        Inst::Adr {
            rd: writable_xreg(15),
-            off: (1 << 20) - 4,
+            label: MemLabel::PCRel((1 << 20) - 4),
        },
        "EFFF7F10",
        "adr x15, pc+1048572",
@ -2274,15 +2163,6 @@ fn test_aarch64_binemit() {
        "mov v8.8b, v4.8b",
    ));

-    insns.push((
-        Inst::FpuMove128 {
-            rd: writable_vreg(17),
-            rn: vreg(26),
-        },
-        "511FBA4E",
-        "mov v17.16b, v26.16b",
-    ));
-
    insns.push((
        Inst::FpuRR {
            fpu_op: FPUOp1::Abs32,
@ -2519,46 +2399,6 @@ fn test_aarch64_binemit() {
        "fmadd d15, d30, d31, d1",
    ));

-    insns.push((
-        Inst::FpuRRI {
-            fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
-            rd: writable_vreg(2),
-            rn: vreg(5),
-        },
-        "A204202F",
-        "ushr v2.2s, v5.2s, #32",
-    ));
-
-    insns.push((
-        Inst::FpuRRI {
-            fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
-            rd: writable_vreg(2),
-            rn: vreg(5),
-        },
-        "A204417F",
-        "ushr d2, d5, #63",
-    ));
-
-    insns.push((
-        Inst::FpuRRI {
-            fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
-            rd: writable_vreg(4),
-            rn: vreg(10),
-        },
-        "44553F2F",
-        "sli v4.2s, v10.2s, #31",
-    ));
-
-    insns.push((
-        Inst::FpuRRI {
-            fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
-            rd: writable_vreg(4),
-            rn: vreg(10),
-        },
-        "44557F7F",
-        "sli d4, d10, #63",
-    ));
-
    insns.push((
        Inst::FpuToInt {
            op: FpuToIntOp::F32ToU32,
@ -2845,15 +2685,6 @@ fn test_aarch64_binemit() {
        "ldr d16, pc+8 ; b 12 ; data.f64 1",
    ));

-    insns.push((
-        Inst::LoadFpuConst128 {
-            rd: writable_vreg(5),
-            const_data: 0x0f0e0d0c0b0a09080706050403020100,
-        },
-        "4500009C05000014000102030405060708090A0B0C0D0E0F",
-        "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
-    ));
-
    insns.push((
        Inst::FpuCSel32 {
            rd: writable_vreg(1),
@ -2960,11 +2791,19 @@ fn test_aarch64_binemit() {
        let actual_printing = insn.show_rru(Some(&rru));
        assert_eq!(expected_printing, actual_printing);

+        // Check the encoding is as expected.
+        let text_size = {
+            let mut code_sec = MachSectionSize::new(0);
+            insn.emit(&mut code_sec, &flags);
+            code_sec.size()
+        };
+
        let mut sink = test_utils::TestCodeSink::new();
-        let mut buffer = MachBuffer::new();
-        insn.emit(&mut buffer, &flags, &mut Default::default());
-        let buffer = buffer.finish();
-        buffer.emit(&mut sink);
+        let mut sections = MachSections::new();
+        let code_idx = sections.add_section(0, text_size);
+        let code_sec = sections.get_section(code_idx);
+        insn.emit(code_sec, &flags);
+        sections.emit(&mut sink);
        let actual_encoding = &sink.stringify();
        assert_eq!(expected_encoding, actual_encoding);
    }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@ -106,85 +106,6 @@ impl SImm7Scaled {
    }
 }

-#[derive(Clone, Copy, Debug)]
-pub struct FPULeftShiftImm {
-    pub amount: u8,
-    pub lane_size_in_bits: u8,
-}
-
-impl FPULeftShiftImm {
-    pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
-        debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
-        if amount < lane_size_in_bits {
-            Some(Self {
-                amount,
-                lane_size_in_bits,
-            })
-        } else {
-            None
-        }
-    }
-
-    pub fn enc(&self) -> u32 {
-        debug_assert!(self.lane_size_in_bits.is_power_of_two());
-        debug_assert!(self.lane_size_in_bits > self.amount);
-        // The encoding of the immediate follows the table below,
-        // where xs encode the shift amount.
-        //
-        // | lane_size_in_bits | encoding |
-        // +------------------------------+
-        // | 8                 | 0001xxx  |
-        // | 16                | 001xxxx  |
-        // | 32                | 01xxxxx  |
-        // | 64                | 1xxxxxx  |
-        //
-        // The highest one bit is represented by `lane_size_in_bits`. Since
-        // `lane_size_in_bits` is a power of 2 and `amount` is less
-        // than `lane_size_in_bits`, they can be ORed
-        // together to produced the encoded value.
-        u32::from(self.lane_size_in_bits | self.amount)
-    }
-}
-
-#[derive(Clone, Copy, Debug)]
-pub struct FPURightShiftImm {
-    pub amount: u8,
-    pub lane_size_in_bits: u8,
-}
-
-impl FPURightShiftImm {
-    pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
-        debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
-        if amount > 0 && amount <= lane_size_in_bits {
-            Some(Self {
-                amount,
-                lane_size_in_bits,
-            })
-        } else {
-            None
-        }
-    }
-
-    pub fn enc(&self) -> u32 {
-        debug_assert_ne!(0, self.amount);
-        // The encoding of the immediate follows the table below,
-        // where xs encodes the negated shift amount.
-        //
-        // | lane_size_in_bits | encoding |
-        // +------------------------------+
-        // | 8                 | 0001xxx  |
-        // | 16                | 001xxxx  |
-        // | 32                | 01xxxxx  |
-        // | 64                | 1xxxxxx  |
-        //
-        // The shift amount is negated such that a shift ammount
-        // of 1 (in 64-bit) is encoded as 0b111111 and a shift
-        // amount of 64 is encoded as 0b000000,
-        // in the bottom 6 bits.
-        u32::from((self.lane_size_in_bits * 2) - self.amount)
-    }
-}
-
 /// a 9-bit signed offset.
 #[derive(Clone, Copy, Debug)]
 pub struct SImm9 {
@ -213,11 +134,6 @@ impl SImm9 {
    pub fn bits(&self) -> u32 {
        (self.value as u32) & 0x1ff
    }
-
-    /// Signed value of immediate.
-    pub fn value(&self) -> i32 {
-        self.value as i32
-    }
 }

 /// An unsigned, scaled 12-bit offset.
@ -256,16 +172,6 @@ impl UImm12Scaled {
    pub fn bits(&self) -> u32 {
        (self.value as u32 / self.scale_ty.bytes()) & 0xfff
    }
-
-    /// Value after scaling.
-    pub fn value(&self) -> u32 {
-        self.value as u32
-    }
-
-    /// The value type which is the scaling base.
-    pub fn scale_ty(&self) -> Type {
-        self.scale_ty
-    }
 }

 /// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
@ -660,18 +566,6 @@ impl ShowWithRRU for SImm7Scaled {
    }
 }

-impl ShowWithRRU for FPULeftShiftImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
-        format!("#{}", self.amount)
-    }
-}
-
-impl ShowWithRRU for FPURightShiftImm {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
-        format!("#{}", self.amount)
-    }
-}
-
 impl ShowWithRRU for SImm9 {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
        format!("#{}", self.value)
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@ -1,6 +1,5 @@
 //! AArch64 ISA definitions: registers.

-use crate::ir::types::*;
 use crate::isa::aarch64::inst::InstSize;
 use crate::machinst::*;
 use crate::settings;
@ -21,21 +20,23 @@ pub const PINNED_REG: u8 = 21;
 const XREG_INDICES: [u8; 31] = [
    // X0 - X7
    32, 33, 34, 35, 36, 37, 38, 39,
-    // X8 - X15
-    40, 41, 42, 43, 44, 45, 46, 47,
+    // X8 - X14
+    40, 41, 42, 43, 44, 45, 46,
+    // X15
+    59,
    // X16, X17
-    58, 59,
+    47, 48,
    // X18
    60,
    // X19, X20
-    48, 49,
+    49, 50,
    // X21, put aside because it's the pinned register.
-    57,
+    58,
    // X22 - X28
-    50, 51, 52, 53, 54, 55, 56,
-    // X29 (FP)
+    51, 52, 53, 54, 55, 56, 57,
+    // X29
    61,
-    // X30 (LR)
+    // X30
    62,
 ];

@ -124,17 +125,14 @@ pub fn writable_fp_reg() -> Writable<Reg> {
    Writable::from_reg(fp_reg())
 }

-/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
-/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
-/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
-/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
-/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
-///
-/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
-/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
-/// to live through call instructions.
+/// Get a reference to the "spill temp" register. This register is used to
+/// compute the address of a spill slot when a direct offset addressing mode from
+/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
+/// and reserve it for this purpose for simplicity; otherwise we need a
+/// multi-stage analysis where we first determine how many spill slots we have,
+/// then perhaps remove the reg from the pool and recompute regalloc.
 pub fn spilltmp_reg() -> Reg {
-    xreg(16)
+    xreg(15)
 }

 /// Get a writable reference to the spilltmp reg.
@ -142,20 +140,6 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
    Writable::from_reg(spilltmp_reg())
 }

-/// Get a reference to the second temp register. We need this in some edge cases
-/// where we need both the spilltmp and another temporary.
-///
-/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
-/// free to use otherwise.
-pub fn tmp2_reg() -> Reg {
-    xreg(17)
-}
-
-/// Get a writable reference to the tmp2 reg.
-pub fn writable_tmp2_reg() -> Writable<Reg> {
-    Writable::from_reg(tmp2_reg())
-}
-
 /// Create the register universe for AArch64.
 pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
    let mut regs = vec![];
@ -189,7 +173,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {

    for i in 0u8..32u8 {
        // See above for excluded registers.
-        if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
+        if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
            continue;
        }
        let reg = Reg::new_real(
@ -207,7 +191,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
        first: x_reg_base as usize,
        last: x_reg_last as usize,
-        suggested_scratch: Some(XREG_INDICES[19] as usize),
+        suggested_scratch: Some(XREG_INDICES[13] as usize),
    });
    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
        first: v_reg_base as usize,
@ -227,8 +211,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
        regs.len()
    };

-    regs.push((xreg(16).to_real_reg(), "x16".to_string()));
-    regs.push((xreg(17).to_real_reg(), "x17".to_string()));
+    regs.push((xreg(15).to_real_reg(), "x15".to_string()));
    regs.push((xreg(18).to_real_reg(), "x18".to_string()));
    regs.push((fp_reg().to_real_reg(), "fp".to_string()));
    regs.push((link_reg().to_real_reg(), "lr".to_string()));
@ -276,17 +259,13 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
    s
 }

-/// Show a vector register.
+/// Show a vector register when its use as a 32-bit or 64-bit float is known.
 pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
    let mut s = reg.show_rru(mb_rru);
    if reg.get_class() != RegClass::V128 {
        return s;
    }
-    let prefix = match size {
-        InstSize::Size32 => "s",
-        InstSize::Size64 => "d",
-        InstSize::Size128 => "q",
-    };
+    let prefix = if size.is32() { "s" } else { "d" };
    s.replace_range(0..1, prefix);
    s
 }
@ -312,17 +291,3 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
    }
    s
 }
-
-/// Show a vector register.
-pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
-    assert_eq!(RegClass::V128, reg.get_class());
-    let mut s = reg.show_rru(mb_rru);
-
-    match ty {
-        I8X16 => s.push_str(".16b"),
-        F32X2 => s.push_str(".2s"),
-        _ => unimplemented!(),
-    }
-
-    s
-}
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@ -14,14 +14,12 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::CodegenResult;

 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;

 use super::lower_inst;

-use log::debug;
 use regalloc::{Reg, RegClass, Writable};

 //============================================================================
@ -106,18 +104,11 @@ pub(crate) enum ResultRegImmShift {
 }

 //============================================================================
-// Instruction input "slots".
+// Instruction input and output "slots".
 //
 // We use these types to refer to operand numbers, and result numbers, together
 // with the associated instruction, in a type-safe way.

-/// Identifier for a particular input of an instruction.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) struct InsnInput {
-    pub(crate) insn: IRInst,
-    pub(crate) input: usize,
-}
-
 /// Identifier for a particular output of an instruction.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) struct InsnOutput {
@ -125,48 +116,102 @@ pub(crate) struct InsnOutput {
    pub(crate) output: usize,
 }

+/// Identifier for a particular input of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) struct InsnInput {
+    pub(crate) insn: IRInst,
+    pub(crate) input: usize,
+}
+
+/// Producer of a value: either a previous instruction's output, or a register that will be
+/// codegen'd separately.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum InsnInputSource {
+    Output(InsnOutput),
+    Reg(Reg),
+}
+
+impl InsnInputSource {
+    fn as_output(self) -> Option<InsnOutput> {
+        match self {
+            InsnInputSource::Output(o) => Some(o),
+            _ => None,
+        }
+    }
+}
+
+fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
+    assert!(num <= ctx.num_inputs(output.insn));
+    InsnInput {
+        insn: output.insn,
+        input: num,
+    }
+}
+
+/// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
+/// register otherwise.
+fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
+    if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
+        let out = InsnOutput {
+            insn: input_inst,
+            output: result_num,
+        };
+        InsnInputSource::Output(out)
+    } else {
+        let reg = ctx.input(input.insn, input.input);
+        InsnInputSource::Reg(reg)
+    }
+}
+
 //============================================================================
-// Lowering: convert instruction inputs to forms that we can use.
+// Lowering: convert instruction outputs to result types.

-/// Lower an instruction input to a 64-bit constant, if possible.
-pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
-    let input = ctx.get_input(input.insn, input.input);
-    input.constant
-}
-
-/// Lower an instruction input to a constant register-shift amount, if possible.
-pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    input: InsnInput,
-) -> Option<ShiftOpShiftImm> {
-    input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
-}
-
-pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    out: InsnOutput,
-) -> Option<u128> {
+/// Lower an instruction output to a 64-bit constant, if possible.
+pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
    if out.output > 0 {
        None
    } else {
        let inst_data = ctx.data(out.insn);
-
-        match inst_data {
-            &InstructionData::UnaryConst {
-                opcode: _,
-                constant_handle,
-            } => {
-                let mut bytes = [0u8; 16];
-                let c = ctx.get_constant_data(constant_handle).clone().into_vec();
-                assert_eq!(c.len(), 16);
-                bytes.copy_from_slice(&c);
-                Some(u128::from_le_bytes(bytes))
+        if inst_data.opcode() == Opcode::Null {
+            Some(0)
+        } else {
+            match inst_data {
+                &InstructionData::UnaryImm { opcode: _, imm } => {
+                    // Only has Into for i64; we use u64 elsewhere, so we cast.
+                    let imm: i64 = imm.into();
+                    Some(imm as u64)
+                }
+                &InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
+                &InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
+                &InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
+                _ => None,
            }
-            _ => None,
        }
    }
 }

+pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    out: InsnOutput,
+) -> Option<f32> {
+    output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
+}
+
+pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    out: InsnOutput,
+) -> Option<f64> {
+    output_to_const(ctx, out).map(|value| f64::from_bits(value))
+}
+
+/// Lower an instruction output to a constant register-shift amount, if possible.
+pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    out: InsnOutput,
+) -> Option<ShiftOpShiftImm> {
+    output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
+}
+
 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
 /// parameter to `input_to_*` below.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@ -192,9 +237,9 @@ impl NarrowValueMode {
    }
 }

-/// Allocate a register for an instruction output and return it.
+/// Lower an instruction output to a reg.
 pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
-    ctx.get_output(out.insn, out.output)
+    ctx.output(out.insn, out.output)
 }

 /// Lower an instruction input to a reg.
@ -207,31 +252,13 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> Reg {
-    debug!("input_to_reg: input {:?}", input);
    let ty = ctx.input_ty(input.insn, input.input);
    let from_bits = ty_bits(ty) as u8;
-    let inputs = ctx.get_input(input.insn, input.input);
-    let in_reg = if let Some(c) = inputs.constant {
-        let masked = if from_bits < 64 {
-            c & ((1u64 << from_bits) - 1)
-        } else {
-            c
-        };
-        // Generate constants fresh at each use to minimize long-range register pressure.
-        let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
-        for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() {
-            ctx.emit(inst);
-        }
-        to_reg.to_reg()
-    } else {
-        ctx.use_input_reg(inputs);
-        inputs.reg
-    };
-
+    let in_reg = ctx.input(input.insn, input.input);
    match (narrow_mode, from_bits) {
        (NarrowValueMode::None, _) => in_reg,
        (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.tmp(RegClass::I64, I32);
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -242,7 +269,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
            tmp.to_reg()
        }
        (NarrowValueMode::SignExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.tmp(RegClass::I64, I32);
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -255,23 +282,18 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
        (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,

        (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
-            if inputs.constant.is_some() {
-                // Constants are zero-extended to full 64-bit width on load already.
-                in_reg
-            } else {
-                let tmp = ctx.alloc_tmp(RegClass::I64, I32);
-                ctx.emit(Inst::Extend {
-                    rd: tmp,
-                    rn: in_reg,
-                    signed: false,
-                    from_bits,
-                    to_bits: 64,
-                });
-                tmp.to_reg()
-            }
+            let tmp = ctx.tmp(RegClass::I64, I32);
+            ctx.emit(Inst::Extend {
+                rd: tmp,
+                rn: in_reg,
+                signed: false,
+                from_bits,
+                to_bits: 64,
+            });
+            tmp.to_reg()
        }
        (NarrowValueMode::SignExtend64, n) if n < 64 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.tmp(RegClass::I64, I32);
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -282,7 +304,6 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
            tmp.to_reg()
        }
        (_, 64) => in_reg,
-        (_, 128) => in_reg,

        _ => panic!(
            "Unsupported input width: input ty {} bits {} mode {:?}",
@ -292,6 +313,8 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
 }

 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
+/// This does not actually codegen the source instruction; it just uses the
+/// vreg into which the source instruction will generate its value.
 ///
 /// The `narrow_mode` flag indicates whether the consumer of this value needs
 /// the high bits clear. For many operations, such as an add/sub/mul or any
@ -307,18 +330,23 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRS {
-    let inputs = ctx.get_input(input.insn, input.input);
-    if let Some((insn, 0)) = inputs.inst {
+    if let InsnInputSource::Output(out) = input_source(ctx, input) {
+        let insn = out.insn;
+        assert!(out.output <= ctx.num_outputs(insn));
        let op = ctx.data(insn).opcode();

        if op == Opcode::Ishl {
-            let shiftee = InsnInput { insn, input: 0 };
-            let shift_amt = InsnInput { insn, input: 1 };
+            let shiftee = get_input(ctx, out, 0);
+            let shift_amt = get_input(ctx, out, 1);

            // Can we get the shift amount as an immediate?
-            if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
-                let reg = input_to_reg(ctx, shiftee, narrow_mode);
-                return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
+            if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
+                if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
+                    let reg = input_to_reg(ctx, shiftee, narrow_mode);
+                    ctx.merged(insn);
+                    ctx.merged(shift_amt_out.insn);
+                    return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
+                }
            }
        }
    }
@ -336,10 +364,11 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRSE {
-    let inputs = ctx.get_input(input.insn, input.input);
-    if let Some((insn, 0)) = inputs.inst {
+    if let InsnInputSource::Output(out) = input_source(ctx, input) {
+        let insn = out.insn;
+        assert!(out.output <= ctx.num_outputs(insn));
        let op = ctx.data(insn).opcode();
-        let out_ty = ctx.output_ty(insn, 0);
+        let out_ty = ctx.output_ty(insn, out.output);
        let out_bits = ty_bits(out_ty);

        // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
@ -349,7 +378,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
            && ((narrow_mode.is_32bit() && out_bits < 32)
                || (!narrow_mode.is_32bit() && out_bits < 64))
        {
-            let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
+            let reg = output_to_reg(ctx, out);
            let extendop = match (narrow_mode, out_bits) {
                (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
                    ExtendOp::SXTB
@ -373,14 +402,15 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
                (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
                _ => unreachable!(),
            };
-            return ResultRSE::RegExtend(reg, extendop);
+            return ResultRSE::RegExtend(reg.to_reg(), extendop);
        }

        // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
        if op == Opcode::Uextend || op == Opcode::Sextend {
            assert!(out_bits == 32 || out_bits == 64);
            let sign_extend = op == Opcode::Sextend;
-            let inner_ty = ctx.input_ty(insn, 0);
+            let extendee = get_input(ctx, out, 0);
+            let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
            let inner_bits = ty_bits(inner_ty);
            assert!(inner_bits < out_bits);
            let extendop = match (sign_extend, inner_bits) {
@ -394,7 +424,8 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
                (false, 32) => ExtendOp::UXTW,
                _ => unreachable!(),
            };
-            let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
+            let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
+            ctx.merged(insn);
            return ResultRSE::RegExtend(reg, extendop);
        }
    }
@ -407,9 +438,12 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRSEImm12 {
-    if let Some(imm_value) = input_to_const(ctx, input) {
-        if let Some(i) = Imm12::maybe_from_u64(imm_value) {
-            return ResultRSEImm12::Imm12(i);
+    if let InsnInputSource::Output(out) = input_source(ctx, input) {
+        if let Some(imm_value) = output_to_const(ctx, out) {
+            if let Some(i) = Imm12::maybe_from_u64(imm_value) {
+                ctx.merged(out.insn);
+                return ResultRSEImm12::Imm12(i);
+            }
        }
    }

@ -421,11 +455,14 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRSImmLogic {
-    if let Some(imm_value) = input_to_const(ctx, input) {
-        let ty = ctx.input_ty(input.insn, input.input);
-        let ty = if ty_bits(ty) < 32 { I32 } else { ty };
-        if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
-            return ResultRSImmLogic::ImmLogic(i);
+    if let InsnInputSource::Output(out) = input_source(ctx, input) {
+        if let Some(imm_value) = output_to_const(ctx, out) {
+            let ty = ctx.output_ty(out.insn, out.output);
+            let ty = if ty_bits(ty) < 32 { I32 } else { ty };
+            if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
+                ctx.merged(out.insn);
+                return ResultRSImmLogic::ImmLogic(i);
+            }
        }
    }

@ -436,9 +473,12 @@ pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
 ) -> ResultRegImmShift {
-    if let Some(imm_value) = input_to_const(ctx, input) {
-        if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
-            return ResultRegImmShift::ImmShift(immshift);
+    if let InsnInputSource::Output(out) = input_source(ctx, input) {
+        if let Some(imm_value) = output_to_const(ctx, out) {
+            if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
+                ctx.merged(out.insn);
+                return ResultRegImmShift::ImmShift(immshift);
+            }
        }
    }

@ -544,10 +584,12 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
    // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
    // mul instructions (Load/StoreComplex don't include scale factors).

-    // Handle one reg and offset.
+    // Handle one reg and offset that fits in immediate, if possible.
    if addends.len() == 1 {
        let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
-        return MemArg::RegOffset(reg, offset as i64, elem_ty);
+        if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) {
+            return memarg;
+        }
    }

    // Handle two regs and a zero offset, if possible.
@ -558,7 +600,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
    }

    // Otherwise, generate add instructions.
-    let addr = ctx.alloc_tmp(RegClass::I64, I64);
+    let addr = ctx.tmp(RegClass::I64, I64);

    // Get the const into a reg.
    lower_constant_u64(ctx, addr.clone(), offset as u64);
@ -570,7 +612,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
        // In an addition, the stack register is the zero register, so divert it to another
        // register just before doing the actual add.
        let reg = if reg == stack_reg() {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+            let tmp = ctx.tmp(RegClass::I64, I64);
            ctx.emit(Inst::Mov {
                rd: tmp,
                rm: stack_reg(),
@ -617,14 +659,6 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
    ctx.emit(Inst::load_fp_constant64(rd, value));
 }

-pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    rd: Writable<Reg>,
-    value: u128,
-) {
-    ctx.emit(Inst::load_fp_constant128(rd, value));
-}
-
 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
    match cc {
        IntCC::Equal => Cond::Eq,
@ -716,7 +750,6 @@ pub fn ty_bits(ty: Type) -> usize {
        B64 | I64 | F64 => 64,
        B128 | I128 => 128,
        IFLAGS | FFLAGS => 32,
-        I8X16 | B8X16 => 128,
        _ => panic!("ty_bits() on unknown type: {:?}", ty),
    }
 }
@ -724,7 +757,7 @@ pub fn ty_bits(ty: Type) -> usize {
 pub(crate) fn ty_is_int(ty: Type) -> bool {
    match ty {
        B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
-        F32 | F64 | B128 | I128 | I8X16 => false,
+        F32 | F64 | B128 | I128 => false,
        IFLAGS | FFLAGS => panic!("Unexpected flags type"),
        _ => panic!("ty_is_int() on unknown type: {:?}", ty),
    }
@ -790,29 +823,24 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
    }
 }

-/// Checks for an instance of `op` feeding the given input.
+/// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
 pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
    c: &mut C,
    input: InsnInput,
    op: Opcode,
 ) -> Option<IRInst> {
-    let inputs = c.get_input(input.insn, input.input);
-    debug!(
-        "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
-        input, inputs, op
-    );
-    if let Some((src_inst, _)) = inputs.inst {
-        let data = c.data(src_inst);
-        debug!(" -> input inst {:?}", data);
+    if let InsnInputSource::Output(out) = input_source(c, input) {
+        let data = c.data(out.insn);
        if data.opcode() == op {
-            return Some(src_inst);
+            c.merged(out.insn);
+            return Some(out.insn);
        }
    }
    None
 }

 /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
-/// Bint or a bitcast).
+/// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
 ///
 /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
 /// a bit more generic.
@ -822,19 +850,21 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
    op: Opcode,
    conv: Opcode,
 ) -> Option<IRInst> {
-    let inputs = c.get_input(input.insn, input.input);
-    if let Some((src_inst, _)) = inputs.inst {
-        let data = c.data(src_inst);
-        if data.opcode() == op {
-            return Some(src_inst);
-        }
+    if let Some(ret) = maybe_input_insn(c, input, op) {
+        return Some(ret);
+    }
+
+    if let InsnInputSource::Output(out) = input_source(c, input) {
+        let data = c.data(out.insn);
        if data.opcode() == conv {
-            let inputs = c.get_input(src_inst, 0);
-            if let Some((src_inst, _)) = inputs.inst {
-                let data = c.data(src_inst);
-                if data.opcode() == op {
-                    return Some(src_inst);
-                }
+            let conv_insn = out.insn;
+            let conv_input = InsnInput {
+                insn: conv_insn,
+                input: 0,
+            };
+            if let Some(inner) = maybe_input_insn(c, conv_input, op) {
+                c.merged(conv_insn);
+                return Some(inner);
            }
        }
    }
@ -846,7 +876,6 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
    insn: IRInst,
    is_signed: bool,
 ) {
-    debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
    let ty = ctx.input_ty(insn, 0);
    let bits = ty_bits(ty);
    let narrow_mode = match (bits <= 32, is_signed) {
@ -868,7 +897,6 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
    let ty = ctx.input_ty(insn, 0);
    let rn = input_to_reg(ctx, inputs[0], narrow_mode);
    let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
-    debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
    let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
    let rd = writable_zero_reg();
    ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
@ -906,21 +934,17 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
 impl LowerBackend for AArch64Backend {
    type MInst = Inst;

-    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
-        lower_inst::lower_insn_to_regs(ctx, ir_inst)
+    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
+        lower_inst::lower_insn_to_regs(ctx, ir_inst);
    }

    fn lower_branch_group<C: LowerCtx<I = Inst>>(
        &self,
        ctx: &mut C,
        branches: &[IRInst],
-        targets: &[MachLabel],
-        fallthrough: Option<MachLabel>,
-    ) -> CodegenResult<()> {
+        targets: &[BlockIndex],
+        fallthrough: Option<BlockIndex>,
+    ) {
        lower_inst::lower_branch(ctx, branches, targets, fallthrough)
    }
-
-    fn maybe_pinned_reg(&self) -> Option<Reg> {
-        Some(xreg(PINNED_REG))
-    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
@ -1,20 +1,17 @@
 //! Lower a single Cranelift instruction into vcode.

-use crate::binemit::CodeOffset;
 use crate::ir::condcodes::FloatCC;
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::{CodegenError, CodegenResult};

 use crate::isa::aarch64::abi::*;
 use crate::isa::aarch64::inst::*;

 use regalloc::RegClass;

-use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::convert::TryFrom;
 use smallvec::SmallVec;
@ -22,10 +19,7 @@ use smallvec::SmallVec;
 use super::lower::*;

 /// Actually codegen an instruction's results into registers.
-pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    insn: IRInst,
-) -> CodegenResult<()> {
+pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    let op = ctx.data(insn).opcode();
    let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
        .map(|i| InsnInput { insn, input: i })
@ -41,17 +35,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

    match op {
        Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
-            let value = ctx.get_constant(insn).unwrap();
+            let value = output_to_const(ctx, outputs[0]).unwrap();
            let rd = output_to_reg(ctx, outputs[0]);
            lower_constant_u64(ctx, rd, value);
        }
        Opcode::F32const => {
-            let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
+            let value = output_to_const_f32(ctx, outputs[0]).unwrap();
            let rd = output_to_reg(ctx, outputs[0]);
            lower_constant_f32(ctx, rd, value);
        }
        Opcode::F64const => {
-            let value = f64::from_bits(ctx.get_constant(insn).unwrap());
+            let value = output_to_const_f64(ctx, outputs[0]).unwrap();
            let rd = output_to_reg(ctx, outputs[0]);
            lower_constant_f64(ctx, rd, value);
        }
@ -85,8 +79,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                VecALUOp::UQAddScalar
            };
-            let va = ctx.alloc_tmp(RegClass::V128, I128);
-            let vb = ctx.alloc_tmp(RegClass::V128, I128);
+            let va = ctx.tmp(RegClass::V128, I128);
+            let vb = ctx.tmp(RegClass::V128, I128);
            let ra = input_to_reg(ctx, inputs[0], narrow_mode);
            let rb = input_to_reg(ctx, inputs[1], narrow_mode);
            let rd = output_to_reg(ctx, outputs[0]);
@ -97,7 +91,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                rn: va.to_reg(),
                rm: vb.to_reg(),
                alu_op,
-                ty: I64,
            });
            ctx.emit(Inst::MovFromVec64 {
                rd,
@ -117,8 +110,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                VecALUOp::UQSubScalar
            };
-            let va = ctx.alloc_tmp(RegClass::V128, I128);
-            let vb = ctx.alloc_tmp(RegClass::V128, I128);
+            let va = ctx.tmp(RegClass::V128, I128);
+            let vb = ctx.tmp(RegClass::V128, I128);
            let ra = input_to_reg(ctx, inputs[0], narrow_mode);
            let rb = input_to_reg(ctx, inputs[1], narrow_mode);
            let rd = output_to_reg(ctx, outputs[0]);
@ -129,7 +122,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                rn: va.to_reg(),
                rm: vb.to_reg(),
                alu_op,
-                ty: I64,
            });
            ctx.emit(Inst::MovFromVec64 {
                rd,
@ -279,7 +271,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

                // Check for divide by 0.
                let branch_size = 8;
-                ctx.emit(Inst::OneWayCondBr {
+                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(branch_size),
                    kind: CondBrKind::NotZero(rm),
                });
@ -305,7 +297,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

                    // Check for divide by 0.
                    let branch_size = 20;
-                    ctx.emit(Inst::OneWayCondBr {
+                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(branch_size),
                        kind: CondBrKind::Zero(rm),
                    });
@ -332,7 +324,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        nzcv: NZCV::new(false, false, false, false),
                        cond: Cond::Eq,
                    });
-                    ctx.emit(Inst::OneWayCondBr {
+                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(12),
                        kind: CondBrKind::Cond(Cond::Vc),
                    });
@ -345,7 +337,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

                    // Check for divide by 0.
                    let branch_size = 8;
-                    ctx.emit(Inst::OneWayCondBr {
+                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(branch_size),
                        kind: CondBrKind::NotZero(rm),
                    });
@ -501,7 +493,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            // ignored (because of the implicit masking done by the instruction),
                            // so this is equivalent to negating the input.
                            let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
-                            let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+                            let tmp = ctx.tmp(RegClass::I64, ty);
                            ctx.emit(Inst::AluRRR {
                                alu_op,
                                rd: tmp,
@ -524,7 +516,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                            // Really ty_bits_size - rn, but the upper bits of the result are
                            // ignored (because of the implicit masking done by the instruction),
                            // so this is equivalent to negating the input.
-                            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+                            let tmp = ctx.tmp(RegClass::I64, I32);
                            ctx.emit(Inst::AluRRR {
                                alu_op: ALUOp::Sub32,
                                rd: tmp,
@ -537,7 +529,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        };

                        // Explicitly mask the rotation count.
-                        let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
+                        let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
                        ctx.emit(Inst::AluRRImmLogic {
                            alu_op: ALUOp::And32,
                            rd: tmp_masked_rm,
@ -546,8 +538,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        });
                        let tmp_masked_rm = tmp_masked_rm.to_reg();

-                        let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
-                        let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
+                        let tmp1 = ctx.tmp(RegClass::I64, I32);
+                        let tmp2 = ctx.tmp(RegClass::I64, I32);
                        ctx.emit(Inst::AluRRImm12 {
                            alu_op: ALUOp::Sub32,
                            rd: tmp1,
@ -586,7 +578,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        }
                        immshift.imm &= ty_bits_size - 1;

-                        let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
+                        let tmp1 = ctx.tmp(RegClass::I64, I32);
                        ctx.emit(Inst::AluRRImmShift {
                            alu_op: ALUOp::Lsr32,
                            rd: tmp1,
@ -691,7 +683,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // and fix the sequence below to work properly for this.
            let narrow_mode = NarrowValueMode::ZeroExtend64;
            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-            let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+            let tmp = ctx.tmp(RegClass::I64, I64);

            // If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
            // the rest of the code is identical to the 64-bit version.
@ -878,7 +870,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
                (64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
                (64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
-                (128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
                _ => panic!("Unsupported size in load"),
            });
        }
@ -918,7 +909,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (32, true) => Inst::FpuStore32 { rd, mem, srcloc },
                (64, false) => Inst::Store64 { rd, mem, srcloc },
                (64, true) => Inst::FpuStore64 { rd, mem, srcloc },
-                (128, _) => Inst::FpuStore128 { rd, mem, srcloc },
                _ => panic!("Unsupported size in store"),
            });
        }
@ -1002,7 +992,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bitselect => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+            let tmp = ctx.tmp(RegClass::I64, I64);
            let rd = output_to_reg(ctx, outputs[0]);
            let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
            let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
@ -1155,66 +1145,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (false, true) => NarrowValueMode::SignExtend64,
                (false, false) => NarrowValueMode::ZeroExtend64,
            };
-
-            if ty_bits(ty) < 128 {
-                let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
-                let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-                let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
-                let rd = output_to_reg(ctx, outputs[0]);
-                ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
-                ctx.emit(Inst::CondSet { cond, rd });
-            } else {
-                if ty != I8X16 {
-                    return Err(CodegenError::Unsupported(format!(
-                        "unsupported simd type: {:?}",
-                        ty
-                    )));
-                }
-
-                let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
-                let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
-                let rd = output_to_reg(ctx, outputs[0]);
-
-                // 'Less than' operations are implemented by swapping
-                // the order of operands and using the 'greater than'
-                // instructions.
-                // 'Not equal' is implemented with 'equal' and inverting
-                // the result.
-                let (alu_op, swap) = match cond {
-                    Cond::Eq => (VecALUOp::Cmeq, false),
-                    Cond::Ne => (VecALUOp::Cmeq, false),
-                    Cond::Ge => (VecALUOp::Cmge, false),
-                    Cond::Gt => (VecALUOp::Cmgt, false),
-                    Cond::Le => (VecALUOp::Cmge, true),
-                    Cond::Lt => (VecALUOp::Cmgt, true),
-                    Cond::Hs => (VecALUOp::Cmhs, false),
-                    Cond::Hi => (VecALUOp::Cmhi, false),
-                    Cond::Ls => (VecALUOp::Cmhs, true),
-                    Cond::Lo => (VecALUOp::Cmhi, true),
-                    _ => unreachable!(),
-                };
-
-                if swap {
-                    std::mem::swap(&mut rn, &mut rm);
-                }
-
-                ctx.emit(Inst::VecRRR {
-                    alu_op,
-                    rd,
-                    rn,
-                    rm,
-                    ty,
-                });
-
-                if cond == Cond::Ne {
-                    ctx.emit(Inst::VecMisc {
-                        op: VecMisc2::Not,
-                        rd,
-                        rn: rd.to_reg(),
-                        ty: I8X16,
-                    });
-                }
-            }
+            let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
+            let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
+            let rd = output_to_reg(ctx, outputs[0]);
+            ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
+            ctx.emit(Inst::CondSet { cond, rd });
        }

        Opcode::Fcmp => {
@ -1252,15 +1188,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Trapif | Opcode::Trapff => {
            let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());

-            let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
-                let condcode = inst_condcode(ctx.data(insn)).unwrap();
-                let cond = lower_condcode(condcode);
-                // The flags must not have been clobbered by any other
-                // instruction between the iadd_ifcout and this instruction, as
-                // verified by the CLIF validator; so we can simply use the
-                // flags here.
-                cond
-            } else if op == Opcode::Trapif {
+            let cond = if op == Opcode::Trapif {
                let condcode = inst_condcode(ctx.data(insn)).unwrap();
                let cond = lower_condcode(condcode);
                let is_signed = condcode_is_signed(condcode);
@ -1283,7 +1211,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // Branch around the break instruction with inverted cond. Go straight to lowered
            // one-target form; this is logically part of a single-in single-out template lowering.
            let cond = cond.invert();
-            ctx.emit(Inst::OneWayCondBr {
+            ctx.emit(Inst::CondBrLowered {
                target: BranchTarget::ResolvedOffset(8),
                kind: CondBrKind::Cond(cond),
            });
@ -1305,12 +1233,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::FuncAddr => {
            let rd = output_to_reg(ctx, outputs[0]);
-            let (extname, _) = ctx.call_target(insn).unwrap();
-            let extname = extname.clone();
+            let extname = ctx.call_target(insn).unwrap().clone();
            let loc = ctx.srcloc(insn);
            ctx.emit(Inst::LoadExtName {
                rd,
-                name: Box::new(extname),
+                name: extname,
                srcloc: loc,
                offset: 0,
            });
@ -1322,12 +1249,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::SymbolValue => {
            let rd = output_to_reg(ctx, outputs[0]);
-            let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
+            let (extname, offset) = ctx.symbol_value(insn).unwrap();
            let extname = extname.clone();
            let loc = ctx.srcloc(insn);
            ctx.emit(Inst::LoadExtName {
                rd,
-                name: Box::new(extname),
+                name: extname,
                srcloc: loc,
                offset,
            });
@ -1335,50 +1262,54 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Call | Opcode::CallIndirect => {
            let loc = ctx.srcloc(insn);
-            let (mut abi, inputs) = match op {
+            let (abi, inputs) = match op {
                Opcode::Call => {
-                    let (extname, dist) = ctx.call_target(insn).unwrap();
+                    let extname = ctx.call_target(insn).unwrap();
                    let extname = extname.clone();
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
-                    (
-                        AArch64ABICall::from_func(sig, &extname, dist, loc)?,
-                        &inputs[..],
-                    )
+                    (AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
                }
                Opcode::CallIndirect => {
                    let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() - 1 == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
-                    (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
+                    (AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
                }
                _ => unreachable!(),
            };

-            abi.emit_stack_pre_adjust(ctx);
+            for inst in abi.gen_stack_pre_adjust().into_iter() {
+                ctx.emit(inst);
+            }
            assert!(inputs.len() == abi.num_args());
            for (i, input) in inputs.iter().enumerate() {
                let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
-                abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+                for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
+                    ctx.emit(inst);
+                }
+            }
+            for inst in abi.gen_call().into_iter() {
+                ctx.emit(inst);
            }
-            abi.emit_call(ctx);
            for (i, output) in outputs.iter().enumerate() {
                let retval_reg = output_to_reg(ctx, *output);
-                abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+                ctx.emit(abi.gen_copy_retval_to_reg(i, retval_reg));
+            }
+            for inst in abi.gen_stack_post_adjust().into_iter() {
+                ctx.emit(inst);
            }
-            abi.emit_stack_post_adjust(ctx);
        }

        Opcode::GetPinnedReg => {
            let rd = output_to_reg(ctx, outputs[0]);
-            ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
+            ctx.emit(Inst::GetPinnedReg { rd });
        }
-
        Opcode::SetPinnedReg => {
            let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
+            ctx.emit(Inst::SetPinnedReg { rm });
        }

        Opcode::Spill
@ -1409,20 +1340,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            panic!("Branch opcode reached non-branch lowering logic!");
        }

-        Opcode::Vconst => {
-            let value = output_to_const_f128(ctx, outputs[0]).unwrap();
-            let rd = output_to_reg(ctx, outputs[0]);
-            lower_constant_f128(ctx, rd, value);
-        }
-
-        Opcode::RawBitcast => {
-            let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
-            let ty = ctx.input_ty(insn, 0);
-            ctx.emit(Inst::gen_move(rd, rm, ty));
-        }
-
-        Opcode::Shuffle
+        Opcode::Vconst
+        | Opcode::Shuffle
        | Opcode::Vsplit
        | Opcode::Vconcat
        | Opcode::Vselect
@ -1431,20 +1350,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::Splat
        | Opcode::Insertlane
        | Opcode::Extractlane
+        | Opcode::RawBitcast
        | Opcode::ScalarToVector
        | Opcode::Swizzle
        | Opcode::Uload8x8
-        | Opcode::Uload8x8Complex
        | Opcode::Sload8x8
-        | Opcode::Sload8x8Complex
        | Opcode::Uload16x4
-        | Opcode::Uload16x4Complex
        | Opcode::Sload16x4
-        | Opcode::Sload16x4Complex
        | Opcode::Uload32x2
-        | Opcode::Uload32x2Complex
-        | Opcode::Sload32x2
-        | Opcode::Sload32x2Complex => {
+        | Opcode::Sload32x2 => {
            // TODO
            panic!("Vector ops not implemented.");
        }
@ -1538,38 +1452,54 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Fcopysign => {
            // Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
            //
-            // This is a scalar Fcopysign.
-            // This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
+            // (64 bits for example, 32-bit sequence is analogous):
            //
-            //  mov vd, vn
-            //  ushr vtmp, vm, #63 / #31
-            //  sli vd, vtmp, #63 / #31
+            // MOV Xtmp1, Dinput0
+            // MOV Xtmp2, Dinput1
+            // AND Xtmp2, 0x8000_0000_0000_0000
+            // BIC Xtmp1, 0x8000_0000_0000_0000
+            // ORR Xtmp1, Xtmp1, Xtmp2
+            // MOV Doutput, Xtmp1

            let ty = ctx.output_ty(insn, 0);
-            let bits = ty_bits(ty) as u8;
+            let bits = ty_bits(ty);
            assert!(bits == 32 || bits == 64);
            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
            let rd = output_to_reg(ctx, outputs[0]);
-            let tmp = ctx.alloc_tmp(RegClass::V128, F64);
-
-            // Copy LHS to rd.
-            ctx.emit(Inst::FpuMove64 { rd, rn });
-
-            // Copy the sign bit to the lowest bit in tmp.
-            let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
-            ctx.emit(Inst::FpuRRI {
-                fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
-                rd: tmp,
-                rn: rm,
+            let tmp1 = ctx.tmp(RegClass::I64, I64);
+            let tmp2 = ctx.tmp(RegClass::I64, I64);
+            ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
+            ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
+            let imml = if bits == 32 {
+                ImmLogic::maybe_from_u64(0x8000_0000, I32).unwrap()
+            } else {
+                ImmLogic::maybe_from_u64(0x8000_0000_0000_0000, I64).unwrap()
+            };
+            let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
+            ctx.emit(Inst::AluRRImmLogic {
+                alu_op,
+                rd: tmp2,
+                rn: tmp2.to_reg(),
+                imml: imml.clone(),
            });
-
-            // Insert the bit from tmp into the sign bit of rd.
-            let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
-            ctx.emit(Inst::FpuRRI {
-                fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
+            let alu_op = choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64);
+            ctx.emit(Inst::AluRRImmLogic {
+                alu_op,
+                rd: tmp1,
+                rn: tmp1.to_reg(),
+                imml,
+            });
+            let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
+            ctx.emit(Inst::AluRRR {
+                alu_op,
+                rd: tmp1,
+                rn: tmp1.to_reg(),
+                rm: tmp2.to_reg(),
+            });
+            ctx.emit(Inst::MovToVec64 {
                rd,
-                rn: tmp.to_reg(),
+                rn: tmp1.to_reg(),
            });
        }

@ -1601,14 +1531,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
            }
-            ctx.emit(Inst::OneWayCondBr {
+            ctx.emit(Inst::CondBrLowered {
                target: BranchTarget::ResolvedOffset(8),
                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
            });
            let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
            ctx.emit(Inst::Udf { trap_info });

-            let tmp = ctx.alloc_tmp(RegClass::V128, I128);
+            let tmp = ctx.tmp(RegClass::V128, I128);

            // Check that the input is in range, with "truncate towards zero" semantics. This means
            // we allow values that are slightly out of range:
@ -1642,7 +1572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
+                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
                });
@ -1655,7 +1585,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
+                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
                });
@ -1685,7 +1615,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
+                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
                });
@ -1698,7 +1628,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
+                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
                });
@ -1774,8 +1704,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                _ => unreachable!(),
            };

-            let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
-            let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
+            let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
+            let rtmp2 = ctx.tmp(RegClass::V128, in_ty);

            if in_bits == 32 {
                ctx.emit(Inst::LoadFpuConst32 {
@ -1860,35 +1790,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::IaddIfcout => {
-            // This is a two-output instruction that is needed for the
-            // legalizer's explicit heap-check sequence, among possible other
-            // uses. Its second output is a flags output only ever meant to
-            // check for overflow using the
-            // `backend.unsigned_add_overflow_condition()` condition.
-            //
-            // Note that the CLIF validation will ensure that no flag-setting
-            // operation comes between this IaddIfcout and its use (e.g., a
-            // Trapif). Thus, we can rely on implicit communication through the
-            // processor flags rather than explicitly generating flags into a
-            // register. We simply use the variant of the add instruction that
-            // sets flags (`adds`) here.
-
-            // Ensure that the second output isn't directly called for: it
-            // should only be used by a flags-consuming op, which will directly
-            // understand this instruction and merge the comparison.
-            assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg()));
-
-            // Now handle the iadd as above, except use an AddS opcode that sets
-            // flags.
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
-            let ty = ty.unwrap();
-            let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
-            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
-        }
-
        Opcode::IaddImm
        | Opcode::ImulImm
        | Opcode::UdivImm
@ -1899,6 +1800,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::IaddCin
        | Opcode::IaddIfcin
        | Opcode::IaddCout
+        | Opcode::IaddIfcout
        | Opcode::IaddCarry
        | Opcode::IaddIfcarry
        | Opcode::IsubBin
@ -1947,8 +1849,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::X86Pmaxu
        | Opcode::X86Pmins
        | Opcode::X86Pminu
-        | Opcode::X86Pmullq
-        | Opcode::X86Pmuludq
        | Opcode::X86Packss
        | Opcode::X86Punpckh
        | Opcode::X86Punpckl
@ -1960,16 +1860,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::AvgRound => unimplemented!(),
        Opcode::TlsValue => unimplemented!(),
    }
-
-    Ok(())
 }

 pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    branches: &[IRInst],
-    targets: &[MachLabel],
-    fallthrough: Option<MachLabel>,
-) -> CodegenResult<()> {
+    targets: &[BlockIndex],
+    fallthrough: Option<BlockIndex>,
+) {
    // A block should end with at most two branches. The first may be a
    // conditional branch; a conditional branch can be followed only by an
    // unconditional branch or fallthrough. Otherwise, if only one branch,
@ -1983,14 +1881,18 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
        let op0 = ctx.data(branches[0]).opcode();
        let op1 = ctx.data(branches[1]).opcode();

+        //println!(
+        //    "lowering two-branch group: opcodes are {:?} and {:?}",
+        //    op0, op1
+        //);
+
        assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
-        let taken = BranchTarget::Label(targets[0]);
+        let taken = BranchTarget::Block(targets[0]);
        let not_taken = match op1 {
-            Opcode::Jump => BranchTarget::Label(targets[1]),
-            Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
+            Opcode::Jump => BranchTarget::Block(targets[1]),
+            Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
            _ => unreachable!(), // assert above.
        };
-
        match op0 {
            Opcode::Brz | Opcode::Brnz => {
                let flag_input = InsnInput {
@ -2050,8 +1952,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
            Opcode::BrIcmp => {
                let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
                let cond = lower_condcode(condcode);
-                let kind = CondBrKind::Cond(cond);
-
                let is_signed = condcode_is_signed(condcode);
                let ty = ctx.input_ty(branches[0], 0);
                let bits = ty_bits(ty);
@ -2084,15 +1984,13 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                ctx.emit(Inst::CondBr {
                    taken,
                    not_taken,
-                    kind,
+                    kind: CondBrKind::Cond(cond),
                });
            }

            Opcode::Brif => {
                let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
                let cond = lower_condcode(condcode);
-                let kind = CondBrKind::Cond(cond);
-
                let is_signed = condcode_is_signed(condcode);
                let flag_input = InsnInput {
                    insn: branches[0],
@ -2103,7 +2001,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    ctx.emit(Inst::CondBr {
                        taken,
                        not_taken,
-                        kind,
+                        kind: CondBrKind::Cond(cond),
                    });
                } else {
                    // If the ifcmp result is actually placed in a
@ -2113,7 +2011,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    ctx.emit(Inst::CondBr {
                        taken,
                        not_taken,
-                        kind,
+                        kind: CondBrKind::Cond(cond),
                    });
                }
            }
@ -2121,7 +2019,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
            Opcode::Brff => {
                let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
                let cond = lower_fp_condcode(condcode);
-                let kind = CondBrKind::Cond(cond);
                let flag_input = InsnInput {
                    insn: branches[0],
                    input: 0,
@ -2131,7 +2028,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    ctx.emit(Inst::CondBr {
                        taken,
                        not_taken,
-                        kind,
+                        kind: CondBrKind::Cond(cond),
                    });
                } else {
                    // If the ffcmp result is actually placed in a
@ -2141,7 +2038,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    ctx.emit(Inst::CondBr {
                        taken,
                        not_taken,
-                        kind,
+                        kind: CondBrKind::Cond(cond),
                    });
                }
            }
@ -2158,15 +2055,12 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                // fills in `targets[0]` with our fallthrough block, so this
                // is valid for both Jump and Fallthrough.
                ctx.emit(Inst::Jump {
-                    dest: BranchTarget::Label(targets[0]),
+                    dest: BranchTarget::Block(targets[0]),
                });
            }
            Opcode::BrTable => {
                // Expand `br_table index, default, JT` to:
                //
-                //   emit_island  // this forces an island at this point
-                //                // if the jumptable would push us past
-                //                // the deadline
                //   subs idx, #jt_size
                //   b.hs default
                //   adr vTmp1, PC+16
@ -2176,11 +2070,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                //   [jumptable offsets relative to JT base]
                let jt_size = targets.len() - 1;
                assert!(jt_size <= std::u32::MAX as usize);
-
-                ctx.emit(Inst::EmitIsland {
-                    needed_space: 4 * (6 + jt_size) as CodeOffset,
-                });
-
                let ridx = input_to_reg(
                    ctx,
                    InsnInput {
@ -2190,8 +2079,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    NarrowValueMode::ZeroExtend32,
                );

-                let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
-                let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
+                let rtmp1 = ctx.tmp(RegClass::I64, I32);
+                let rtmp2 = ctx.tmp(RegClass::I64, I32);

                // Bounds-check and branch to default.
                if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
@ -2210,10 +2099,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                        rm: rtmp1.to_reg(),
                    });
                }
-                let default_target = BranchTarget::Label(targets[0]);
-                ctx.emit(Inst::OneWayCondBr {
-                    target: default_target.clone(),
+                let default_target = BranchTarget::Block(targets[0]);
+                ctx.emit(Inst::CondBrLowered {
                    kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
+                    target: default_target.clone(),
                });

                // Emit the compound instruction that does:
@ -2234,23 +2123,19 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                let jt_targets: Vec<BranchTarget> = targets
                    .iter()
                    .skip(1)
-                    .map(|bix| BranchTarget::Label(*bix))
+                    .map(|bix| BranchTarget::Block(*bix))
                    .collect();
-                let targets_for_term: Vec<MachLabel> = targets.to_vec();
+                let targets_for_term: Vec<BlockIndex> = targets.to_vec();
                ctx.emit(Inst::JTSequence {
                    ridx,
                    rtmp1,
                    rtmp2,
-                    info: Box::new(JTSequenceInfo {
-                        targets: jt_targets,
-                        targets_for_term: targets_for_term,
-                    }),
+                    targets: jt_targets,
+                    targets_for_term,
                });
            }

            _ => panic!("Unknown branch type!"),
        }
    }
-
-    Ok(())
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
@ -1,6 +1,5 @@
 //! ARM 64-bit Instruction Set Architecture.

-use crate::ir::condcodes::IntCC;
 use crate::ir::Function;
 use crate::isa::Builder as IsaBuilder;
 use crate::machinst::{
@ -16,7 +15,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, Triple};

 // New backend:
 mod abi;
-pub(crate) mod inst;
+mod inst;
 mod lower;
 mod lower_inst;

@ -26,18 +25,12 @@ use inst::create_reg_universe;
 pub struct AArch64Backend {
    triple: Triple,
    flags: settings::Flags,
-    reg_universe: RealRegUniverse,
 }

 impl AArch64Backend {
    /// Create a new AArch64 backend with the given (shared) flags.
    pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
-        let reg_universe = create_reg_universe(&flags);
-        AArch64Backend {
-            triple,
-            flags,
-            reg_universe,
-        }
+        AArch64Backend { triple, flags }
    }

    /// This performs lowering to VCode, register-allocates the code, computes block layout and
@ -47,7 +40,7 @@ impl AArch64Backend {
        func: &Function,
        flags: settings::Flags,
    ) -> CodegenResult<VCode<inst::Inst>> {
-        let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
+        let abi = Box::new(abi::AArch64ABIBody::new(func, flags));
        compile::compile::<AArch64Backend>(func, self, abi)
    }
 }
@ -60,7 +53,7 @@ impl MachBackend for AArch64Backend {
    ) -> CodegenResult<MachCompileResult> {
        let flags = self.flags();
        let vcode = self.compile_vcode(func, flags.clone())?;
-        let buffer = vcode.emit();
+        let sections = vcode.emit();
        let frame_size = vcode.frame_size();

        let disasm = if want_disasm {
@ -69,10 +62,8 @@ impl MachBackend for AArch64Backend {
            None
        };

-        let buffer = buffer.finish();
-
        Ok(MachCompileResult {
-            buffer,
+            sections,
            frame_size,
            disasm,
        })
@ -90,21 +81,8 @@ impl MachBackend for AArch64Backend {
        &self.flags
    }

-    fn reg_universe(&self) -> &RealRegUniverse {
-        &self.reg_universe
-    }
-
-    fn unsigned_add_overflow_condition(&self) -> IntCC {
-        // Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on
-        // overflow of an add.
-        IntCC::UnsignedGreaterThanOrEqual
-    }
-
-    fn unsigned_sub_overflow_condition(&self) -> IntCC {
-        // unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on
-        // underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the
-        // opposite of x86).
-        IntCC::UnsignedLessThan
+    fn reg_universe(&self) -> RealRegUniverse {
+        create_reg_universe(&self.flags)
    }
 }

@ -156,8 +134,8 @@ mod test {
            Triple::from_str("aarch64").unwrap(),
            settings::Flags::new(shared_flags),
        );
-        let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
-        let code = &buffer.data[..];
+        let sections = backend.compile_function(&mut func, false).unwrap().sections;
+        let code = &sections.sections[0].data;

        // stp x29, x30, [sp, #-16]!
        // mov x29, sp
@ -171,7 +149,7 @@ mod test {
            0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
        ];

-        assert_eq!(code, &golden[..]);
+        assert_eq!(code, &golden);
    }

    #[test]
@ -214,32 +192,34 @@ mod test {
        let result = backend
            .compile_function(&mut func, /* want_disasm = */ false)
            .unwrap();
-        let code = &result.buffer.data[..];
+        let code = &result.sections.sections[0].data;

        // stp	x29, x30, [sp, #-16]!
        // mov	x29, sp
-        // mov	x1, #0x1234                	// #4660
-        // add	w0, w0, w1
-        // mov	w1, w0
-        // cbnz	x1, 0x28
-        // mov	x1, #0x1234                	// #4660
-        // add	w1, w0, w1
-        // mov	w1, w1
-        // cbnz	x1, 0x18
-        // mov	w1, w0
-        // cbnz	x1, 0x18
-        // mov	x1, #0x1234                	// #4660
-        // sub	w0, w0, w1
+        // mov	x1, x0
+        // mov  x0, #0x1234
+        // add	w1, w1, w0
+        // mov	w2, w1
+        // cbz	x2, ...
+        // mov	w2, w1
+        // cbz	x2, ...
+        // sub	w0, w1, w0
        // mov	sp, x29
        // ldp	x29, x30, [sp], #16
        // ret
+        // add	w2, w1, w0
+        // mov	w2, w2
+        // cbnz	x2, ... <---- compound branch (cond / uncond)
+        // b ...        <----
+
        let golden = vec![
-            253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
-            0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
-            0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
-            193, 168, 192, 3, 95, 214,
+            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
+            0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
+            0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
+            0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
+            0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
        ];

-        assert_eq!(code, &golden[..]);
+        assert_eq!(code, &golden);
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
@ -17,7 +17,6 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
 use alloc::borrow::Cow;
 use alloc::boxed::Box;
-use core::any::Any;
 use core::fmt;
 use target_lexicon::{Architecture, Triple};

@ -136,10 +135,6 @@ impl TargetIsa for Isa {
    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
        ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
 }

 impl fmt::Display for Isa {
--- a/third_party/rust/cranelift-codegen/src/isa/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/mod.rs
@ -66,7 +66,6 @@ use crate::settings::SetResult;
 use crate::timing;
 use alloc::borrow::Cow;
 use alloc::boxed::Box;
-use core::any::Any;
 use core::fmt;
 use core::fmt::{Debug, Formatter};
 use target_lexicon::{triple, Architecture, PointerWidth, Triple};
@ -78,14 +77,11 @@ mod riscv;
 #[cfg(feature = "x86")]
 mod x86;

-#[cfg(feature = "x64")]
-mod x64;
-
 #[cfg(feature = "arm32")]
 mod arm32;

 #[cfg(feature = "arm64")]
-pub(crate) mod aarch64;
+mod aarch64;

 #[cfg(feature = "unwind")]
 pub mod unwind;
@ -423,10 +419,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
    fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
        None
    }
-
-    /// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait
-    /// with `isa.as_any().downcast_ref::<isa::foo::Isa>()`.
-    fn as_any(&self) -> &dyn Any;
 }

 impl Debug for &dyn TargetIsa {
--- a/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
@ -17,7 +17,6 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
 use alloc::borrow::Cow;
 use alloc::boxed::Box;
-use core::any::Any;
 use core::fmt;
 use target_lexicon::{PointerWidth, Triple};

@ -131,10 +130,6 @@ impl TargetIsa for Isa {
    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
        unimplemented!()
    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
 }

 #[cfg(test)]
@ -168,7 +163,7 @@ mod tests {
        let arg32 = func.dfg.append_block_param(block, types::I32);

        // Try to encode iadd_imm.i64 v1, -10.
-        let inst64 = InstructionData::BinaryImm64 {
+        let inst64 = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg64,
            imm: immediates::Imm64::new(-10),
@ -181,7 +176,7 @@ mod tests {
        );

        // Try to encode iadd_imm.i64 v1, -10000.
-        let inst64_large = InstructionData::BinaryImm64 {
+        let inst64_large = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg64,
            imm: immediates::Imm64::new(-10000),
@ -191,7 +186,7 @@ mod tests {
        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());

        // Create an iadd_imm.i32 which is encodable in RV64.
-        let inst32 = InstructionData::BinaryImm64 {
+        let inst32 = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg32,
            imm: immediates::Imm64::new(10),
@ -219,7 +214,7 @@ mod tests {
        let arg32 = func.dfg.append_block_param(block, types::I32);

        // Try to encode iadd_imm.i64 v1, -10.
-        let inst64 = InstructionData::BinaryImm64 {
+        let inst64 = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg64,
            imm: immediates::Imm64::new(-10),
@ -229,7 +224,7 @@ mod tests {
        assert!(isa.encode(&func, &inst64, types::I64).is_err());

        // Try to encode iadd_imm.i64 v1, -10000.
-        let inst64_large = InstructionData::BinaryImm64 {
+        let inst64_large = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg64,
            imm: immediates::Imm64::new(-10000),
@ -239,7 +234,7 @@ mod tests {
        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());

        // Create an iadd_imm.i32 which is encodable in RV32.
-        let inst32 = InstructionData::BinaryImm64 {
+        let inst32 = InstructionData::BinaryImm {
            opcode: Opcode::IaddImm,
            arg: arg32,
            imm: immediates::Imm64::new(10),
--- a/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
@ -8,6 +8,7 @@ use thiserror::Error;
 use serde::{Deserialize, Serialize};

 type Register = u16;
+type Expression = Vec<u8>;

 /// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
 #[allow(missing_docs)]
@ -22,8 +23,6 @@ pub enum RegisterMappingError {
 }

 // This mirrors gimli's CallFrameInstruction, but is serializable
-// This excludes CfaExpression, Expression, ValExpression due to
-// https://github.com/gimli-rs/gimli/issues/513.
 // TODO: if gimli ever adds serialization support, remove this type
 #[derive(Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
@ -31,12 +30,15 @@ pub(crate) enum CallFrameInstruction {
    Cfa(Register, i32),
    CfaRegister(Register),
    CfaOffset(i32),
+    CfaExpression(Expression),
    Restore(Register),
    Undefined(Register),
    SameValue(Register),
    Offset(Register, i32),
    ValOffset(Register, i32),
    Register(Register, Register),
+    Expression(Register, Expression),
+    ValExpression(Register, Expression),
    RememberState,
    RestoreState,
    ArgsSize(u32),
@ -50,33 +52,34 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
            CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
            CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
            CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
+            CallFrameInstruction::CfaExpression(expr) => Self::CfaExpression(expr.0),
            CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
            CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
            CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
            CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
            CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
            CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
+            CallFrameInstruction::Expression(reg, expr) => Self::Expression(reg.0, expr.0),
+            CallFrameInstruction::ValExpression(reg, expr) => Self::ValExpression(reg.0, expr.0),
            CallFrameInstruction::RememberState => Self::RememberState,
            CallFrameInstruction::RestoreState => Self::RestoreState,
            CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
-            _ => {
-                // Cranelift's unwind support does not generate `CallFrameInstruction`s with
-                // Expression at this moment, and it is not trivial to
-                // serialize such instructions.
-                panic!("CallFrameInstruction with Expression not supported");
-            }
        }
    }
 }

 impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
    fn into(self) -> gimli::write::CallFrameInstruction {
-        use gimli::{write::CallFrameInstruction, Register};
+        use gimli::{
+            write::{CallFrameInstruction, Expression},
+            Register,
+        };

        match self {
            Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
            Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
            Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
+            Self::CfaExpression(expr) => CallFrameInstruction::CfaExpression(Expression(expr)),
            Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
            Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
            Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
@ -85,6 +88,12 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
            Self::Register(reg1, reg2) => {
                CallFrameInstruction::Register(Register(reg1), Register(reg2))
            }
+            Self::Expression(reg, expr) => {
+                CallFrameInstruction::Expression(Register(reg), Expression(expr))
+            }
+            Self::ValExpression(reg, expr) => {
+                CallFrameInstruction::ValExpression(Register(reg), Expression(expr))
+            }
            Self::RememberState => CallFrameInstruction::RememberState,
            Self::RestoreState => CallFrameInstruction::RestoreState,
            Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
--- a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
@ -1,467 +0,0 @@
-//! Implementation of the standard x64 ABI.
-
-use alloc::vec::Vec;
-use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
-
-use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
-use crate::isa::{self, x64::inst::*};
-use crate::machinst::*;
-use crate::settings;
-
-use args::*;
-
-#[derive(Clone, Debug)]
-enum ABIArg {
-    Reg(RealReg),
-    _Stack,
-}
-
-#[derive(Clone, Debug)]
-enum ABIRet {
-    Reg(RealReg),
-    _Stack,
-}
-
-pub(crate) struct X64ABIBody {
-    args: Vec<ABIArg>,
-    rets: Vec<ABIRet>,
-
-    /// Offsets to each stack slot.
-    _stack_slots: Vec<usize>,
-
-    /// Total stack size of all the stack slots.
-    stack_slots_size: usize,
-
-    /// Clobbered registers, as indicated by regalloc.
-    clobbered: Set<Writable<RealReg>>,
-
-    /// Total number of spill slots, as indicated by regalloc.
-    num_spill_slots: Option<usize>,
-
-    /// Calculated while creating the prologue, and used when creating the epilogue. Amount by
-    /// which RSP is adjusted downwards to allocate the spill area.
-    frame_size_bytes: Option<usize>,
-
-    call_conv: isa::CallConv,
-
-    /// The settings controlling this function's compilation.
-    flags: settings::Flags,
-}
-
-fn in_int_reg(ty: types::Type) -> bool {
-    match ty {
-        types::I8
-        | types::I16
-        | types::I32
-        | types::I64
-        | types::B1
-        | types::B8
-        | types::B16
-        | types::B32
-        | types::B64 => true,
-        _ => false,
-    }
-}
-
-fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
-    match idx {
-        0 => Some(regs::rdi()),
-        1 => Some(regs::rsi()),
-        2 => Some(regs::rdx()),
-        3 => Some(regs::rcx()),
-        4 => Some(regs::r8()),
-        5 => Some(regs::r9()),
-        _ => None,
-    }
-}
-
-fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
-    match idx {
-        0 => Some(regs::rax()),
-        1 => Some(regs::rdx()),
-        _ => None,
-    }
-}
-
-fn is_callee_save_systemv(r: RealReg) -> bool {
-    use regs::*;
-    match r.get_class() {
-        RegClass::I64 => match r.get_hw_encoding() as u8 {
-            ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
-            _ => false,
-        },
-        _ => unimplemented!(),
-    }
-}
-
-fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
-    regs.into_iter()
-        .filter(|r| is_callee_save_systemv(r.to_reg()))
-        .collect()
-}
-
-impl X64ABIBody {
-    /// Create a new body ABI instance.
-    pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
-        // Compute args and retvals from signature.
-        let mut args = vec![];
-        let mut next_int_arg = 0;
-        for param in &f.signature.params {
-            match param.purpose {
-                ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
-                    // `VMContext` is `r14` in Baldrdash.
-                    args.push(ABIArg::Reg(regs::r14().to_real_reg()));
-                }
-
-                ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
-                    if in_int_reg(param.value_type) {
-                        if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
-                            args.push(ABIArg::Reg(reg.to_real_reg()));
-                        } else {
-                            unimplemented!("passing arg on the stack");
-                        }
-                        next_int_arg += 1;
-                    } else {
-                        unimplemented!("non int normal register")
-                    }
-                }
-
-                _ => unimplemented!("other parameter purposes"),
-            }
-        }
-
-        let mut rets = vec![];
-        let mut next_int_retval = 0;
-        for ret in &f.signature.returns {
-            match ret.purpose {
-                ir::ArgumentPurpose::Normal => {
-                    if in_int_reg(ret.value_type) {
-                        if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
-                            rets.push(ABIRet::Reg(reg.to_real_reg()));
-                        } else {
-                            unimplemented!("passing return on the stack");
-                        }
-                        next_int_retval += 1;
-                    } else {
-                        unimplemented!("returning non integer normal value");
-                    }
-                }
-
-                _ => {
-                    unimplemented!("non normal argument purpose");
-                }
-            }
-        }
-
-        // Compute stackslot locations and total stackslot size.
-        let mut stack_offset: usize = 0;
-        let mut _stack_slots = vec![];
-        for (stackslot, data) in f.stack_slots.iter() {
-            let off = stack_offset;
-            stack_offset += data.size as usize;
-
-            // 8-bit align.
-            stack_offset = (stack_offset + 7) & !7usize;
-
-            debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
-            _stack_slots.push(off);
-        }
-
-        Self {
-            args,
-            rets,
-            _stack_slots,
-            stack_slots_size: stack_offset,
-            clobbered: Set::empty(),
-            num_spill_slots: None,
-            frame_size_bytes: None,
-            call_conv: f.signature.call_conv.clone(),
-            flags,
-        }
-    }
-}
-
-impl ABIBody for X64ABIBody {
-    type I = Inst;
-
-    fn temp_needed(&self) -> bool {
-        false
-    }
-
-    fn init(&mut self, _: Option<Writable<Reg>>) {}
-
-    fn flags(&self) -> &settings::Flags {
-        &self.flags
-    }
-
-    fn num_args(&self) -> usize {
-        unimplemented!()
-    }
-
-    fn num_retvals(&self) -> usize {
-        unimplemented!()
-    }
-
-    fn num_stackslots(&self) -> usize {
-        unimplemented!()
-    }
-
-    fn liveins(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for arg in &self.args {
-            if let &ABIArg::Reg(r) = arg {
-                set.insert(r);
-            }
-        }
-        set
-    }
-
-    fn liveouts(&self) -> Set<RealReg> {
-        let mut set: Set<RealReg> = Set::empty();
-        for ret in &self.rets {
-            if let &ABIRet::Reg(r) = ret {
-                set.insert(r);
-            }
-        }
-        set
-    }
-
-    fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
-        match &self.args[idx] {
-            ABIArg::Reg(from_reg) => {
-                if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
-                    // TODO do we need a sign extension if it's I32?
-                    return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
-                }
-                unimplemented!("moving from non-int arg to vreg");
-            }
-            ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
-        }
-    }
-
-    fn gen_retval_area_setup(&self) -> Option<Inst> {
-        None
-    }
-
-    fn gen_copy_reg_to_retval(
-        &self,
-        idx: usize,
-        from_reg: Writable<Reg>,
-        ext: ArgumentExtension,
-    ) -> Vec<Inst> {
-        match ext {
-            ArgumentExtension::None => {}
-            _ => unimplemented!(
-                "unimplemented argument extension {:?} is required for baldrdash",
-                ext
-            ),
-        };
-
-        let mut ret = Vec::new();
-        match &self.rets[idx] {
-            ABIRet::Reg(to_reg) => {
-                if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
-                    ret.push(Inst::mov_r_r(
-                        /*is64=*/ true,
-                        from_reg.to_reg(),
-                        Writable::<Reg>::from_reg(to_reg.to_reg()),
-                    ))
-                } else {
-                    unimplemented!("moving from vreg to non-int return value");
-                }
-            }
-
-            ABIRet::_Stack => {
-                unimplemented!("moving from vreg to stack return value");
-            }
-        }
-
-        ret
-    }
-
-    fn gen_ret(&self) -> Inst {
-        Inst::ret()
-    }
-
-    fn gen_epilogue_placeholder(&self) -> Inst {
-        Inst::epilogue_placeholder()
-    }
-
-    fn set_num_spillslots(&mut self, slots: usize) {
-        self.num_spill_slots = Some(slots);
-    }
-
-    fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
-        self.clobbered = clobbered;
-    }
-
-    fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
-        unimplemented!()
-    }
-
-    fn load_stackslot(
-        &self,
-        _slot: StackSlot,
-        _offset: u32,
-        _ty: Type,
-        _into_reg: Writable<Reg>,
-    ) -> Inst {
-        unimplemented!("load_stackslot")
-    }
-
-    fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
-        unimplemented!("store_stackslot")
-    }
-
-    fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
-        unimplemented!("load_spillslot")
-    }
-
-    fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
-        unimplemented!("store_spillslot")
-    }
-
-    fn gen_prologue(&mut self) -> Vec<Inst> {
-        let r_rsp = regs::rsp();
-
-        let mut insts = vec![];
-
-        // Baldrdash generates its own prologue sequence, so we don't have to.
-        if !self.call_conv.extends_baldrdash() {
-            let r_rbp = regs::rbp();
-            let w_rbp = Writable::<Reg>::from_reg(r_rbp);
-
-            // The "traditional" pre-preamble
-            // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
-            insts.push(Inst::push64(RMI::reg(r_rbp)));
-            // RSP is now 0 % 16
-            insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
-        }
-
-        // Save callee saved registers that we trash. Keep track of how much space we've used, so
-        // as to know what we have to do to get the base of the spill area 0 % 16.
-        let mut callee_saved_used = 0;
-        let clobbered = get_callee_saves(self.clobbered.to_vec());
-        for reg in clobbered {
-            let r_reg = reg.to_reg();
-            match r_reg.get_class() {
-                RegClass::I64 => {
-                    insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
-                    callee_saved_used += 8;
-                }
-                _ => unimplemented!(),
-            }
-        }
-
-        let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
-        if self.call_conv.extends_baldrdash() {
-            // Baldrdash expects the stack to take at least the number of words set in
-            // baldrdash_prologue_words; count them here.
-            debug_assert!(
-                !self.flags.enable_probestack(),
-                "baldrdash does not expect cranelift to emit stack probes"
-            );
-            total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
-        }
-
-        debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
-        let frame_size = total_stacksize + callee_saved_used % 16;
-
-        // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
-        let frame_size = (frame_size + 15) & !15;
-        if frame_size > 0x7FFF_FFFF {
-            unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
-        }
-
-        if !self.call_conv.extends_baldrdash() {
-            // Explicitly allocate the frame.
-            let w_rsp = Writable::<Reg>::from_reg(r_rsp);
-            if frame_size > 0 {
-                insts.push(Inst::alu_rmi_r(
-                    true,
-                    RMI_R_Op::Sub,
-                    RMI::imm(frame_size as u32),
-                    w_rsp,
-                ));
-            }
-        }
-
-        // Stash this value.  We'll need it for the epilogue.
-        debug_assert!(self.frame_size_bytes.is_none());
-        self.frame_size_bytes = Some(frame_size);
-
-        insts
-    }
-
-    fn gen_epilogue(&self) -> Vec<Inst> {
-        let mut insts = vec![];
-
-        // Undo what we did in the prologue.
-
-        // Clear the spill area and the 16-alignment padding below it.
-        if !self.call_conv.extends_baldrdash() {
-            let frame_size = self.frame_size_bytes.unwrap();
-            if frame_size > 0 {
-                let r_rsp = regs::rsp();
-                let w_rsp = Writable::<Reg>::from_reg(r_rsp);
-
-                insts.push(Inst::alu_rmi_r(
-                    true,
-                    RMI_R_Op::Add,
-                    RMI::imm(frame_size as u32),
-                    w_rsp,
-                ));
-            }
-        }
-
-        // Restore regs.
-        let clobbered = get_callee_saves(self.clobbered.to_vec());
-        for w_real_reg in clobbered.into_iter().rev() {
-            match w_real_reg.to_reg().get_class() {
-                RegClass::I64 => {
-                    // TODO: make these conversion sequences less cumbersome.
-                    insts.push(Inst::pop64(Writable::<Reg>::from_reg(
-                        w_real_reg.to_reg().to_reg(),
-                    )))
-                }
-                _ => unimplemented!(),
-            }
-        }
-
-        // Baldrdash generates its own preamble.
-        if !self.call_conv.extends_baldrdash() {
-            let r_rbp = regs::rbp();
-            let w_rbp = Writable::<Reg>::from_reg(r_rbp);
-
-            // Undo the "traditional" pre-preamble
-            // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
-            insts.push(Inst::pop64(w_rbp));
-            insts.push(Inst::ret());
-        }
-
-        insts
-    }
-
-    fn frame_size(&self) -> u32 {
-        self.frame_size_bytes
-            .expect("frame size not computed before prologue generation") as u32
-    }
-
-    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
-        // We allocate in terms of 8-byte slots.
-        match (rc, ty) {
-            (RegClass::I64, _) => 1,
-            (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
-            (RegClass::V128, _) => 2,
-            _ => panic!("Unexpected register class!"),
-        }
-    }
-
-    fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
-        unimplemented!()
-    }
-
-    fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
-        unimplemented!()
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@ -1,420 +0,0 @@
-//! Instruction operand sub-components (aka "parts"): definitions and printing.
-
-use std::fmt;
-use std::string::{String, ToString};
-
-use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
-
-use crate::machinst::*;
-
-use super::regs::show_ireg_sized;
-
-/// A Memory Address. These denote a 64-bit value only.
-#[derive(Clone)]
-pub(crate) enum Addr {
-    /// Immediate sign-extended and a Register.
-    IR { simm32: u32, base: Reg },
-
-    /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
-    IRRS {
-        simm32: u32,
-        base: Reg,
-        index: Reg,
-        shift: u8, /* 0 .. 3 only */
-    },
-}
-
-impl Addr {
-    // Constructors.
-
-    pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
-        debug_assert!(base.get_class() == RegClass::I64);
-        Self::IR { simm32, base }
-    }
-
-    pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
-        debug_assert!(base.get_class() == RegClass::I64);
-        debug_assert!(index.get_class() == RegClass::I64);
-        debug_assert!(shift <= 3);
-        Addr::IRRS {
-            simm32,
-            base,
-            index,
-            shift,
-        }
-    }
-
-    /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
-        match self {
-            Addr::IR { simm32: _, base } => {
-                collector.add_use(*base);
-            }
-            Addr::IRRS {
-                simm32: _,
-                base,
-                index,
-                shift: _,
-            } => {
-                collector.add_use(*base);
-                collector.add_use(*index);
-            }
-        }
-    }
-}
-
-impl ShowWithRRU for Addr {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        match self {
-            Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
-            Addr::IRRS {
-                simm32,
-                base,
-                index,
-                shift,
-            } => format!(
-                "{}({},{},{})",
-                *simm32 as i32,
-                base.show_rru(mb_rru),
-                index.show_rru(mb_rru),
-                1 << shift
-            ),
-        }
-    }
-}
-
-/// An operand which is either an integer Register, a value in Memory or an Immediate.  This can
-/// denote an 8, 16, 32 or 64 bit value.  For the Immediate form, in the 8- and 16-bit case, only
-/// the lower 8 or 16 bits of `simm32` is relevant.  In the 64-bit case, the value denoted by
-/// `simm32` is its sign-extension out to 64 bits.
-#[derive(Clone)]
-pub(crate) enum RMI {
-    R { reg: Reg },
-    M { addr: Addr },
-    I { simm32: u32 },
-}
-
-impl RMI {
-    // Constructors
-
-    pub(crate) fn reg(reg: Reg) -> RMI {
-        debug_assert!(reg.get_class() == RegClass::I64);
-        RMI::R { reg }
-    }
-    pub(crate) fn mem(addr: Addr) -> RMI {
-        RMI::M { addr }
-    }
-    pub(crate) fn imm(simm32: u32) -> RMI {
-        RMI::I { simm32 }
-    }
-
-    /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
-        match self {
-            RMI::R { reg } => collector.add_use(*reg),
-            RMI::M { addr } => addr.get_regs_as_uses(collector),
-            RMI::I { simm32: _ } => {}
-        }
-    }
-}
-
-impl ShowWithRRU for RMI {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.show_rru_sized(mb_rru, 8)
-    }
-
-    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-        match self {
-            RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RMI::M { addr } => addr.show_rru(mb_rru),
-            RMI::I { simm32 } => format!("${}", *simm32 as i32),
-        }
-    }
-}
-
-/// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
-/// 32 or 64 bit value.
-#[derive(Clone)]
-pub(crate) enum RM {
-    R { reg: Reg },
-    M { addr: Addr },
-}
-
-impl RM {
-    // Constructors.
-
-    pub(crate) fn reg(reg: Reg) -> Self {
-        debug_assert!(reg.get_class() == RegClass::I64);
-        RM::R { reg }
-    }
-
-    pub(crate) fn mem(addr: Addr) -> Self {
-        RM::M { addr }
-    }
-
-    /// Add the regs mentioned by `self` to `collector`.
-    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
-        match self {
-            RM::R { reg } => collector.add_use(*reg),
-            RM::M { addr } => addr.get_regs_as_uses(collector),
-        }
-    }
-}
-
-impl ShowWithRRU for RM {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        self.show_rru_sized(mb_rru, 8)
-    }
-
-    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-        match self {
-            RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RM::M { addr } => addr.show_rru(mb_rru),
-        }
-    }
-}
-
-/// Some basic ALU operations.  TODO: maybe add Adc, Sbb.
-#[derive(Clone, PartialEq)]
-pub enum RMI_R_Op {
-    Add,
-    Sub,
-    And,
-    Or,
-    Xor,
-    /// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
-    Mul,
-}
-
-impl RMI_R_Op {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            RMI_R_Op::Add => "add".to_string(),
-            RMI_R_Op::Sub => "sub".to_string(),
-            RMI_R_Op::And => "and".to_string(),
-            RMI_R_Op::Or => "or".to_string(),
-            RMI_R_Op::Xor => "xor".to_string(),
-            RMI_R_Op::Mul => "imul".to_string(),
-        }
-    }
-}
-
-impl fmt::Debug for RMI_R_Op {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
-    }
-}
-
-/// These indicate ways of extending (widening) a value, using the Intel naming:
-/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
-#[derive(Clone, PartialEq)]
-pub enum ExtMode {
-    /// Byte -> Longword.
-    BL,
-    /// Byte -> Quadword.
-    BQ,
-    /// Word -> Longword.
-    WL,
-    /// Word -> Quadword.
-    WQ,
-    /// Longword -> Quadword.
-    LQ,
-}
-
-impl ExtMode {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            ExtMode::BL => "bl".to_string(),
-            ExtMode::BQ => "bq".to_string(),
-            ExtMode::WL => "wl".to_string(),
-            ExtMode::WQ => "wq".to_string(),
-            ExtMode::LQ => "lq".to_string(),
-        }
-    }
-
-    pub(crate) fn dst_size(&self) -> u8 {
-        match self {
-            ExtMode::BL => 4,
-            ExtMode::BQ => 8,
-            ExtMode::WL => 4,
-            ExtMode::WQ => 8,
-            ExtMode::LQ => 8,
-        }
-    }
-}
-
-impl fmt::Debug for ExtMode {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
-    }
-}
-
-/// These indicate the form of a scalar shift: left, signed right, unsigned right.
-#[derive(Clone)]
-pub enum ShiftKind {
-    Left,
-    RightZ,
-    RightS,
-}
-
-impl ShiftKind {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            ShiftKind::Left => "shl".to_string(),
-            ShiftKind::RightZ => "shr".to_string(),
-            ShiftKind::RightS => "sar".to_string(),
-        }
-    }
-}
-
-impl fmt::Debug for ShiftKind {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
-    }
-}
-
-/// These indicate condition code tests.  Not all are represented since not all are useful in
-/// compiler-generated code.
-#[derive(Copy, Clone)]
-#[repr(u8)]
-pub enum CC {
-    ///  overflow
-    O = 0,
-    /// no overflow
-    NO = 1,
-
-    /// < unsigned
-    B = 2,
-    /// >= unsigned
-    NB = 3,
-
-    /// zero
-    Z = 4,
-    /// not-zero
-    NZ = 5,
-
-    /// <= unsigned
-    BE = 6,
-    /// > unsigend
-    NBE = 7,
-
-    /// negative
-    S = 8,
-    /// not-negative
-    NS = 9,
-
-    /// < signed
-    L = 12,
-    /// >= signed
-    NL = 13,
-
-    /// <= signed
-    LE = 14,
-    /// > signed
-    NLE = 15,
-}
-
-impl CC {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            CC::O => "o".to_string(),
-            CC::NO => "no".to_string(),
-            CC::B => "b".to_string(),
-            CC::NB => "nb".to_string(),
-            CC::Z => "z".to_string(),
-            CC::NZ => "nz".to_string(),
-            CC::BE => "be".to_string(),
-            CC::NBE => "nbe".to_string(),
-            CC::S => "s".to_string(),
-            CC::NS => "ns".to_string(),
-            CC::L => "l".to_string(),
-            CC::NL => "nl".to_string(),
-            CC::LE => "le".to_string(),
-            CC::NLE => "nle".to_string(),
-        }
-    }
-
-    pub(crate) fn invert(&self) -> CC {
-        match self {
-            CC::O => CC::NO,
-            CC::NO => CC::O,
-
-            CC::B => CC::NB,
-            CC::NB => CC::B,
-
-            CC::Z => CC::NZ,
-            CC::NZ => CC::Z,
-
-            CC::BE => CC::NBE,
-            CC::NBE => CC::BE,
-
-            CC::S => CC::NS,
-            CC::NS => CC::S,
-
-            CC::L => CC::NL,
-            CC::NL => CC::L,
-
-            CC::LE => CC::NLE,
-            CC::NLE => CC::LE,
-        }
-    }
-
-    pub(crate) fn get_enc(self) -> u8 {
-        self as u8
-    }
-}
-
-impl fmt::Debug for CC {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
-    }
-}
-
-/// A branch target. Either unresolved (basic-block index) or resolved (offset
-/// from end of current instruction).
-#[derive(Clone, Copy, Debug)]
-pub enum BranchTarget {
-    /// An unresolved reference to a MachLabel.
-    Label(MachLabel),
-
-    /// A resolved reference to another instruction, in bytes.
-    ResolvedOffset(isize),
-}
-
-impl ShowWithRRU for BranchTarget {
-    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
-        match self {
-            BranchTarget::Label(l) => format!("{:?}", l),
-            BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
-        }
-    }
-}
-
-impl BranchTarget {
-    /// Get the label.
-    pub fn as_label(&self) -> Option<MachLabel> {
-        match self {
-            &BranchTarget::Label(l) => Some(l),
-            _ => None,
-        }
-    }
-
-    /// Get the offset as a signed 32 bit byte offset.  This returns the
-    /// offset in bytes between the first byte of the source and the first
-    /// byte of the target.  It does not take into account the Intel-specific
-    /// rule that a branch offset is encoded as relative to the start of the
-    /// following instruction.  That is a problem for the emitter to deal
-    /// with. If a label, returns zero.
-    pub fn as_offset32_or_zero(&self) -> i32 {
-        match self {
-            &BranchTarget::ResolvedOffset(off) => {
-                // Leave a bit of slack so that the emitter is guaranteed to
-                // be able to add the length of the jump instruction encoding
-                // to this value and still have a value in signed-32 range.
-                assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
-                off as i32
-            }
-            _ => 0,
-        }
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
@ -1,892 +0,0 @@
-use regalloc::{Reg, RegClass};
-
-use crate::isa::x64::inst::*;
-
-fn low8willSXto64(x: u32) -> bool {
-    let xs = (x as i32) as i64;
-    xs == ((xs << 56) >> 56)
-}
-
-fn low8willSXto32(x: u32) -> bool {
-    let xs = x as i32;
-    xs == ((xs << 24) >> 24)
-}
-
-//=============================================================================
-// Instructions and subcomponents: emission
-
-// For all of the routines that take both a memory-or-reg operand (sometimes
-// called "E" in the Intel documentation) and a reg-only operand ("G" in
-// Intelese), the order is always G first, then E.
-//
-// "enc" in the following means "hardware register encoding number".
-
-#[inline(always)]
-fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
-    debug_assert!(m0d < 4);
-    debug_assert!(encRegG < 8);
-    debug_assert!(rmE < 8);
-    ((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
-}
-
-#[inline(always)]
-fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
-    debug_assert!(shift < 4);
-    debug_assert!(encIndex < 8);
-    debug_assert!(encBase < 8);
-    ((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
-}
-
-/// Get the encoding number from something which we sincerely hope is a real
-/// register of class I64.
-#[inline(always)]
-fn iregEnc(reg: Reg) -> u8 {
-    debug_assert!(reg.is_real());
-    debug_assert!(reg.get_class() == RegClass::I64);
-    reg.get_hw_encoding()
-}
-
-// F_*: these flags describe special handling of the insn to be generated.  Be
-// careful with these.  It is easy to create nonsensical combinations.
-const F_NONE: u32 = 0;
-
-/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
-const F_RETAIN_REDUNDANT_REX: u32 = 1;
-
-/// Set the W bit in the REX prefix to zero.  By default it will be set to 1,
-/// indicating a 64-bit operation.
-const F_CLEAR_REX_W: u32 = 2;
-
-/// Add an 0x66 (operand-size override) prefix.  This is necessary to indicate
-/// a 16-bit operation.  Normally this will be used together with F_CLEAR_REX_W.
-const F_PREFIX_66: u32 = 4;
-
-/// This is the core 'emit' function for instructions that reference memory.
-///
-/// For an instruction that has as operands a register `encG` and a memory
-/// address `memE`, create and emit, first the REX prefix, then caller-supplied
-/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
-/// optionally, a SIB byte, and finally optionally an immediate that will be
-/// derived from the `memE` operand.  For most instructions up to and including
-/// SSE4.2, that will be the whole instruction.
-///
-/// The opcodes are written bigendianly for the convenience of callers.  For
-/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
-/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
-///
-/// The register operand is represented here not as a `Reg` but as its hardware
-/// encoding, `encG`.  `flags` can specify special handling for the REX prefix.
-/// By default, the REX prefix will indicate a 64-bit operation and will be
-/// deleted if it is redundant (0x40).  Note that for a 64-bit operation, the
-/// REX prefix will normally never be redundant, since REX.W must be 1 to
-/// indicate a 64-bit operation.
-fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
-    sink: &mut MachBuffer<Inst>,
-    opcodes: u32,
-    mut numOpcodes: usize,
-    encG: u8,
-    memE: &Addr,
-    flags: u32,
-) {
-    // General comment for this function: the registers in `memE` must be
-    // 64-bit integer registers, because they are part of an address
-    // expression.  But `encG` can be derived from a register of any class.
-    let prefix66 = (flags & F_PREFIX_66) != 0;
-    let clearRexW = (flags & F_CLEAR_REX_W) != 0;
-    let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
-    // The operand-size override, if requested.  This indicates a 16-bit
-    // operation.
-    if prefix66 {
-        sink.put1(0x66);
-    }
-    match memE {
-        Addr::IR { simm32, base: regE } => {
-            // First, cook up the REX byte.  This is easy.
-            let encE = iregEnc(*regE);
-            let w = if clearRexW { 0 } else { 1 };
-            let r = (encG >> 3) & 1;
-            let x = 0;
-            let b = (encE >> 3) & 1;
-            let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
-            if rex != 0x40 || retainRedundant {
-                sink.put1(rex);
-            }
-            // Now the opcode(s).  These include any other prefixes the caller
-            // hands to us.
-            while numOpcodes > 0 {
-                numOpcodes -= 1;
-                sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
-            }
-            // Now the mod/rm and associated immediates.  This is
-            // significantly complicated due to the multiple special cases.
-            if *simm32 == 0
-                && encE != regs::ENC_RSP
-                && encE != regs::ENC_RBP
-                && encE != regs::ENC_R12
-                && encE != regs::ENC_R13
-            {
-                // FIXME JRS 2020Feb11: those four tests can surely be
-                // replaced by a single mask-and-compare check.  We should do
-                // that because this routine is likely to be hot.
-                sink.put1(mkModRegRM(0, encG & 7, encE & 7));
-            } else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
-                sink.put1(mkModRegRM(0, encG & 7, 4));
-                sink.put1(0x24);
-            } else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
-                sink.put1(mkModRegRM(1, encG & 7, encE & 7));
-                sink.put1((simm32 & 0xFF) as u8);
-            } else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
-                sink.put1(mkModRegRM(2, encG & 7, encE & 7));
-                sink.put4(*simm32);
-            } else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
-                // REX.B distinguishes RSP from R12
-                sink.put1(mkModRegRM(1, encG & 7, 4));
-                sink.put1(0x24);
-                sink.put1((simm32 & 0xFF) as u8);
-            } else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
-                //.. wait for test case for RSP case
-                // REX.B distinguishes RSP from R12
-                sink.put1(mkModRegRM(2, encG & 7, 4));
-                sink.put1(0x24);
-                sink.put4(*simm32);
-            } else {
-                unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
-            }
-        }
-        // Bizarrely, the IRRS case is much simpler.
-        Addr::IRRS {
-            simm32,
-            base: regBase,
-            index: regIndex,
-            shift,
-        } => {
-            let encBase = iregEnc(*regBase);
-            let encIndex = iregEnc(*regIndex);
-            // The rex byte
-            let w = if clearRexW { 0 } else { 1 };
-            let r = (encG >> 3) & 1;
-            let x = (encIndex >> 3) & 1;
-            let b = (encBase >> 3) & 1;
-            let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
-            if rex != 0x40 || retainRedundant {
-                sink.put1(rex);
-            }
-            // All other prefixes and opcodes
-            while numOpcodes > 0 {
-                numOpcodes -= 1;
-                sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
-            }
-            // modrm, SIB, immediates
-            if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
-                sink.put1(mkModRegRM(1, encG & 7, 4));
-                sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
-                sink.put1(*simm32 as u8);
-            } else if encIndex != regs::ENC_RSP {
-                sink.put1(mkModRegRM(2, encG & 7, 4));
-                sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
-                sink.put4(*simm32);
-            } else {
-                panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
-            }
-        }
-    }
-}
-
-/// This is the core 'emit' function for instructions that do not reference
-/// memory.
-///
-/// This is conceptually the same as
-/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
-/// where the E operand is a register rather than memory.  Hence it is much
-/// simpler.
-fn emit_REX_OPCODES_MODRM_encG_encE(
-    sink: &mut MachBuffer<Inst>,
-    opcodes: u32,
-    mut numOpcodes: usize,
-    encG: u8,
-    encE: u8,
-    flags: u32,
-) {
-    // EncG and EncE can be derived from registers of any class, and they
-    // don't even have to be from the same class.  For example, for an
-    // integer-to-FP conversion insn, one might be RegClass::I64 and the other
-    // RegClass::V128.
-    let prefix66 = (flags & F_PREFIX_66) != 0;
-    let clearRexW = (flags & F_CLEAR_REX_W) != 0;
-    let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
-    // The operand-size override
-    if prefix66 {
-        sink.put1(0x66);
-    }
-    // The rex byte
-    let w = if clearRexW { 0 } else { 1 };
-    let r = (encG >> 3) & 1;
-    let x = 0;
-    let b = (encE >> 3) & 1;
-    let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
-    if rex != 0x40 || retainRedundant {
-        sink.put1(rex);
-    }
-    // All other prefixes and opcodes
-    while numOpcodes > 0 {
-        numOpcodes -= 1;
-        sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
-    }
-    // Now the mod/rm byte.  The instruction we're generating doesn't access
-    // memory, so there is no SIB byte or immediate -- we're done.
-    sink.put1(mkModRegRM(3, encG & 7, encE & 7));
-}
-
-// These are merely wrappers for the above two functions that facilitate passing
-// actual `Reg`s rather than their encodings.
-
-fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-    sink: &mut MachBuffer<Inst>,
-    opcodes: u32,
-    numOpcodes: usize,
-    regG: Reg,
-    memE: &Addr,
-    flags: u32,
-) {
-    // JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
-    let encG = iregEnc(regG);
-    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
-}
-
-fn emit_REX_OPCODES_MODRM_regG_regE(
-    sink: &mut MachBuffer<Inst>,
-    opcodes: u32,
-    numOpcodes: usize,
-    regG: Reg,
-    regE: Reg,
-    flags: u32,
-) {
-    // JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
-    let encG = iregEnc(regG);
-    let encE = iregEnc(regE);
-    emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
-}
-
-/// Write a suitable number of bits from an imm64 to the sink.
-fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
-    match size {
-        8 | 4 => sink.put4(simm32),
-        2 => sink.put2(simm32 as u16),
-        1 => sink.put1(simm32 as u8),
-        _ => panic!("x64::Inst::emit_simm: unreachable"),
-    }
-}
-
-/// The top-level emit function.
-///
-/// Important!  Do not add improved (shortened) encoding cases to existing
-/// instructions without also adding tests for those improved encodings.  That
-/// is a dangerous game that leads to hard-to-track-down errors in the emitted
-/// code.
-///
-/// For all instructions, make sure to have test coverage for all of the
-/// following situations.  Do this by creating the cross product resulting from
-/// applying the following rules to each operand:
-///
-/// (1) for any insn that mentions a register: one test using a register from
-///     the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
-///     using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
-///     This helps detect incorrect REX prefix construction.
-///
-/// (2) for any insn that mentions a byte register: one test for each of the
-///     four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
-///     [r8b .. r11b] and [r12b .. r15b].  This checks that
-///     apparently-redundant REX prefixes are retained when required.
-///
-/// (3) for any insn that contains an immediate field, check the following
-///     cases: field is zero, field is in simm8 range (-128 .. 127), field is
-///     in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF).  This is because some
-///     instructions that require a 32-bit immediate have a short-form encoding
-///     when the imm is in simm8 range.
-///
-/// Rules (1), (2) and (3) don't apply for registers within address expressions
-/// (`Addr`s).  Those are already pretty well tested, and the registers in them
-/// don't have any effect on the containing instruction (apart from possibly
-/// require REX prefix bits).
-///
-/// When choosing registers for a test, avoid using registers with the same
-/// offset within a given group.  For example, don't use rax and r8, since they
-/// both have the lowest 3 bits as 000, and so the test won't detect errors
-/// where those 3-bit register sub-fields are confused by the emitter.  Instead
-/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001).  Similarly, don't use (eg) cl
-/// and bpl since they have the same offset in their group; use instead (eg) cl
-/// and sil.
-///
-/// For all instructions, also add a test that uses only low-half registers
-/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
-/// prefixes are correctly omitted.  This low-half restriction must apply to
-/// _all_ registers in the insn, even those in address expressions.
-///
-/// Following these rules creates large numbers of test cases, but it's the
-/// only way to make the emitter reliable.
-///
-/// Known possible improvements:
-///
-/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate.  (Do we
-///   care?)
-pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
-    match inst {
-        Inst::Nop { len: 0 } => {}
-        Inst::Alu_RMI_R {
-            is_64,
-            op,
-            src: srcE,
-            dst: regG,
-        } => {
-            let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
-            if *op == RMI_R_Op::Mul {
-                // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
-                // we have to special-case it.
-                match srcE {
-                    RMI::R { reg: regE } => {
-                        emit_REX_OPCODES_MODRM_regG_regE(
-                            sink,
-                            0x0FAF,
-                            2,
-                            regG.to_reg(),
-                            *regE,
-                            flags,
-                        );
-                    }
-                    RMI::M { addr } => {
-                        emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                            sink,
-                            0x0FAF,
-                            2,
-                            regG.to_reg(),
-                            addr,
-                            flags,
-                        );
-                    }
-                    RMI::I { simm32 } => {
-                        let useImm8 = low8willSXto32(*simm32);
-                        let opcode = if useImm8 { 0x6B } else { 0x69 };
-                        // Yes, really, regG twice.
-                        emit_REX_OPCODES_MODRM_regG_regE(
-                            sink,
-                            opcode,
-                            1,
-                            regG.to_reg(),
-                            regG.to_reg(),
-                            flags,
-                        );
-                        emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
-                    }
-                }
-            } else {
-                let (opcode_R, opcode_M, subopcode_I) = match op {
-                    RMI_R_Op::Add => (0x01, 0x03, 0),
-                    RMI_R_Op::Sub => (0x29, 0x2B, 5),
-                    RMI_R_Op::And => (0x21, 0x23, 4),
-                    RMI_R_Op::Or => (0x09, 0x0B, 1),
-                    RMI_R_Op::Xor => (0x31, 0x33, 6),
-                    RMI_R_Op::Mul => panic!("unreachable"),
-                };
-                match srcE {
-                    RMI::R { reg: regE } => {
-                        // Note.  The arguments .. regE .. regG .. sequence
-                        // here is the opposite of what is expected.  I'm not
-                        // sure why this is.  But I am fairly sure that the
-                        // arg order could be switched back to the expected
-                        // .. regG .. regE .. if opcode_rr is also switched
-                        // over to the "other" basic integer opcode (viz, the
-                        // R/RM vs RM/R duality).  However, that would mean
-                        // that the test results won't be in accordance with
-                        // the GNU as reference output.  In other words, the
-                        // inversion exists as a result of using GNU as as a
-                        // gold standard.
-                        emit_REX_OPCODES_MODRM_regG_regE(
-                            sink,
-                            opcode_R,
-                            1,
-                            *regE,
-                            regG.to_reg(),
-                            flags,
-                        );
-                        // NB: if this is ever extended to handle byte size
-                        // ops, be sure to retain redundant REX prefixes.
-                    }
-                    RMI::M { addr } => {
-                        // Whereas here we revert to the "normal" G-E ordering.
-                        emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                            sink,
-                            opcode_M,
-                            1,
-                            regG.to_reg(),
-                            addr,
-                            flags,
-                        );
-                    }
-                    RMI::I { simm32 } => {
-                        let useImm8 = low8willSXto32(*simm32);
-                        let opcode = if useImm8 { 0x83 } else { 0x81 };
-                        // And also here we use the "normal" G-E ordering.
-                        let encG = iregEnc(regG.to_reg());
-                        emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
-                        emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
-                    }
-                }
-            }
-        }
-        Inst::Imm_R {
-            dst_is_64,
-            simm64,
-            dst,
-        } => {
-            let encDst = iregEnc(dst.to_reg());
-            if *dst_is_64 {
-                // FIXME JRS 2020Feb10: also use the 32-bit case here when
-                // possible
-                sink.put1(0x48 | ((encDst >> 3) & 1));
-                sink.put1(0xB8 | (encDst & 7));
-                sink.put8(*simm64);
-            } else {
-                if ((encDst >> 3) & 1) == 1 {
-                    sink.put1(0x41);
-                }
-                sink.put1(0xB8 | (encDst & 7));
-                sink.put4(*simm64 as u32);
-            }
-        }
-        Inst::Mov_R_R { is_64, src, dst } => {
-            let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
-            emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
-        }
-        Inst::MovZX_M_R { extMode, addr, dst } => {
-            match extMode {
-                ExtMode::BL => {
-                    // MOVZBL is (REX.W==0) 0F B6 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FB6,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-                ExtMode::BQ => {
-                    // MOVZBQ is (REX.W==1) 0F B6 /r
-                    // I'm not sure why the Intel manual offers different
-                    // encodings for MOVZBQ than for MOVZBL.  AIUI they should
-                    // achieve the same, since MOVZBL is just going to zero out
-                    // the upper half of the destination anyway.
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FB6,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_NONE,
-                    )
-                }
-                ExtMode::WL => {
-                    // MOVZWL is (REX.W==0) 0F B7 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FB7,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-                ExtMode::WQ => {
-                    // MOVZWQ is (REX.W==1) 0F B7 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FB7,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_NONE,
-                    )
-                }
-                ExtMode::LQ => {
-                    // This is just a standard 32 bit load, and we rely on the
-                    // default zero-extension rule to perform the extension.
-                    // MOV r/m32, r32 is (REX.W==0) 8B /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x8B,
-                        1,
-                        dst.to_reg(),
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-            }
-        }
-        Inst::Mov64_M_R { addr, dst } => {
-            emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
-        }
-        Inst::MovSX_M_R { extMode, addr, dst } => {
-            match extMode {
-                ExtMode::BL => {
-                    // MOVSBL is (REX.W==0) 0F BE /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FBE,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-                ExtMode::BQ => {
-                    // MOVSBQ is (REX.W==1) 0F BE /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FBE,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_NONE,
-                    )
-                }
-                ExtMode::WL => {
-                    // MOVSWL is (REX.W==0) 0F BF /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FBF,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-                ExtMode::WQ => {
-                    // MOVSWQ is (REX.W==1) 0F BF /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x0FBF,
-                        2,
-                        dst.to_reg(),
-                        addr,
-                        F_NONE,
-                    )
-                }
-                ExtMode::LQ => {
-                    // MOVSLQ is (REX.W==1) 63 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x63,
-                        1,
-                        dst.to_reg(),
-                        addr,
-                        F_NONE,
-                    )
-                }
-            }
-        }
-        Inst::Mov_R_M { size, src, addr } => {
-            match size {
-                1 => {
-                    // This is one of the few places where the presence of a
-                    // redundant REX prefix changes the meaning of the
-                    // instruction.
-                    let encSrc = iregEnc(*src);
-                    let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
-                        F_RETAIN_REDUNDANT_REX
-                    } else {
-                        0
-                    };
-                    // MOV r8, r/m8 is (REX.W==0) 88 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x88,
-                        1,
-                        *src,
-                        addr,
-                        F_CLEAR_REX_W | retainRedundantRex,
-                    )
-                }
-                2 => {
-                    // MOV r16, r/m16 is 66 (REX.W==0) 89 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x89,
-                        1,
-                        *src,
-                        addr,
-                        F_CLEAR_REX_W | F_PREFIX_66,
-                    )
-                }
-                4 => {
-                    // MOV r32, r/m32 is (REX.W==0) 89 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
-                        sink,
-                        0x89,
-                        1,
-                        *src,
-                        addr,
-                        F_CLEAR_REX_W,
-                    )
-                }
-                8 => {
-                    // MOV r64, r/m64 is (REX.W==1) 89 /r
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
-                }
-                _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
-            }
-        }
-        Inst::Shift_R {
-            is_64,
-            kind,
-            num_bits,
-            dst,
-        } => {
-            let encDst = iregEnc(dst.to_reg());
-            let subopcode = match kind {
-                ShiftKind::Left => 4,
-                ShiftKind::RightZ => 5,
-                ShiftKind::RightS => 7,
-            };
-            match num_bits {
-                None => {
-                    // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
-                    // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
-                    emit_REX_OPCODES_MODRM_encG_encE(
-                        sink,
-                        0xD3,
-                        1,
-                        subopcode,
-                        encDst,
-                        if *is_64 { F_NONE } else { F_CLEAR_REX_W },
-                    );
-                }
-                Some(num_bits) => {
-                    // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
-                    // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
-                    // When the shift amount is 1, there's an even shorter encoding, but we don't
-                    // bother with that nicety here.
-                    emit_REX_OPCODES_MODRM_encG_encE(
-                        sink,
-                        0xC1,
-                        1,
-                        subopcode,
-                        encDst,
-                        if *is_64 { F_NONE } else { F_CLEAR_REX_W },
-                    );
-                    sink.put1(*num_bits);
-                }
-            }
-        }
-        Inst::Cmp_RMI_R {
-            size,
-            src: srcE,
-            dst: regG,
-        } => {
-            let mut retainRedundantRex = 0;
-            if *size == 1 {
-                // Here, a redundant REX prefix changes the meaning of the
-                // instruction.
-                let encG = iregEnc(*regG);
-                if encG >= 4 && encG <= 7 {
-                    retainRedundantRex = F_RETAIN_REDUNDANT_REX;
-                }
-            }
-            let mut flags = match size {
-                8 => F_NONE,
-                4 => F_CLEAR_REX_W,
-                2 => F_CLEAR_REX_W | F_PREFIX_66,
-                1 => F_CLEAR_REX_W | retainRedundantRex,
-                _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
-            };
-            match srcE {
-                RMI::R { reg: regE } => {
-                    let opcode = if *size == 1 { 0x38 } else { 0x39 };
-                    if *size == 1 {
-                        // We also need to check whether the E register forces
-                        // the use of a redundant REX.
-                        let encE = iregEnc(*regE);
-                        if encE >= 4 && encE <= 7 {
-                            flags |= F_RETAIN_REDUNDANT_REX;
-                        }
-                    }
-                    // Same comment re swapped args as for Alu_RMI_R.
-                    emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
-                }
-                RMI::M { addr } => {
-                    let opcode = if *size == 1 { 0x3A } else { 0x3B };
-                    // Whereas here we revert to the "normal" G-E ordering.
-                    emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
-                }
-                RMI::I { simm32 } => {
-                    // FIXME JRS 2020Feb11: there are shorter encodings for
-                    // cmp $imm, rax/eax/ax/al.
-                    let useImm8 = low8willSXto32(*simm32);
-                    let opcode = if *size == 1 {
-                        0x80
-                    } else if useImm8 {
-                        0x83
-                    } else {
-                        0x81
-                    };
-                    // And also here we use the "normal" G-E ordering.
-                    let encG = iregEnc(*regG);
-                    emit_REX_OPCODES_MODRM_encG_encE(
-                        sink, opcode, 1, 7, /*subopcode*/
-                        encG, flags,
-                    );
-                    emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
-                }
-            }
-        }
-        Inst::Push64 { src } => {
-            match src {
-                RMI::R { reg } => {
-                    let encReg = iregEnc(*reg);
-                    let rex = 0x40 | ((encReg >> 3) & 1);
-                    if rex != 0x40 {
-                        sink.put1(rex);
-                    }
-                    sink.put1(0x50 | (encReg & 7));
-                }
-                RMI::M { addr } => {
-                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
-                        sink,
-                        0xFF,
-                        1,
-                        6, /*subopcode*/
-                        addr,
-                        F_CLEAR_REX_W,
-                    );
-                }
-                RMI::I { simm32 } => {
-                    if low8willSXto64(*simm32) {
-                        sink.put1(0x6A);
-                        sink.put1(*simm32 as u8);
-                    } else {
-                        sink.put1(0x68);
-                        sink.put4(*simm32);
-                    }
-                }
-            }
-        }
-        Inst::Pop64 { dst } => {
-            let encDst = iregEnc(dst.to_reg());
-            if encDst >= 8 {
-                // 0x41 == REX.{W=0, B=1}.  It seems that REX.W is irrelevant
-                // here.
-                sink.put1(0x41);
-            }
-            sink.put1(0x58 + (encDst & 7));
-        }
-        //
-        // ** Inst::CallKnown
-        //
-        Inst::CallUnknown { dest } => {
-            match dest {
-                RM::R { reg } => {
-                    let regEnc = iregEnc(*reg);
-                    emit_REX_OPCODES_MODRM_encG_encE(
-                        sink,
-                        0xFF,
-                        1,
-                        2, /*subopcode*/
-                        regEnc,
-                        F_CLEAR_REX_W,
-                    );
-                }
-                RM::M { addr } => {
-                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
-                        sink,
-                        0xFF,
-                        1,
-                        2, /*subopcode*/
-                        addr,
-                        F_CLEAR_REX_W,
-                    );
-                }
-            }
-        }
-        Inst::Ret {} => sink.put1(0xC3),
-
-        Inst::JmpKnown { dest } => {
-            let disp = dest.as_offset32_or_zero() - 5;
-            let disp = disp as u32;
-            let br_start = sink.cur_offset();
-            let br_disp_off = br_start + 1;
-            let br_end = br_start + 5;
-            if let Some(l) = dest.as_label() {
-                sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
-                sink.add_uncond_branch(br_start, br_end, l);
-            }
-            sink.put1(0xE9);
-            sink.put4(disp);
-        }
-        Inst::JmpCondSymm {
-            cc,
-            taken,
-            not_taken,
-        } => {
-            // Conditional part.
-
-            // This insn is 6 bytes long.  Currently `offset` is relative to
-            // the start of this insn, but the Intel encoding requires it to
-            // be relative to the start of the next instruction.  Hence the
-            // adjustment.
-            let taken_disp = taken.as_offset32_or_zero() - 6;
-            let taken_disp = taken_disp as u32;
-            let cond_start = sink.cur_offset();
-            let cond_disp_off = cond_start + 2;
-            let cond_end = cond_start + 6;
-            if let Some(l) = taken.as_label() {
-                sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
-                let inverted: [u8; 6] =
-                    [0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
-                sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
-            }
-            sink.put1(0x0F);
-            sink.put1(0x80 + cc.get_enc());
-            sink.put4(taken_disp);
-
-            // Unconditional part.
-
-            let nt_disp = not_taken.as_offset32_or_zero() - 5;
-            let nt_disp = nt_disp as u32;
-            let uncond_start = sink.cur_offset();
-            let uncond_disp_off = uncond_start + 1;
-            let uncond_end = uncond_start + 5;
-            if let Some(l) = not_taken.as_label() {
-                sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
-                sink.add_uncond_branch(uncond_start, uncond_end, l);
-            }
-            sink.put1(0xE9);
-            sink.put4(nt_disp);
-        }
-        Inst::JmpUnknown { target } => {
-            match target {
-                RM::R { reg } => {
-                    let regEnc = iregEnc(*reg);
-                    emit_REX_OPCODES_MODRM_encG_encE(
-                        sink,
-                        0xFF,
-                        1,
-                        4, /*subopcode*/
-                        regEnc,
-                        F_CLEAR_REX_W,
-                    );
-                }
-                RM::M { addr } => {
-                    emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
-                        sink,
-                        0xFF,
-                        1,
-                        4, /*subopcode*/
-                        addr,
-                        F_CLEAR_REX_W,
-                    );
-                }
-            }
-        }
-
-        _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
@ -1,905 +0,0 @@
-//! This module defines x86_64-specific machine instruction types.
-
-#![allow(dead_code)]
-#![allow(non_snake_case)]
-#![allow(non_camel_case_types)]
-
-use core::convert::TryFrom;
-use smallvec::SmallVec;
-use std::fmt;
-use std::string::{String, ToString};
-
-use regalloc::RegUsageCollector;
-use regalloc::Set;
-use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
-
-use crate::binemit::CodeOffset;
-use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
-use crate::ir::ExternalName;
-use crate::ir::Type;
-use crate::machinst::*;
-use crate::settings::Flags;
-use crate::{settings, CodegenError, CodegenResult};
-
-pub mod args;
-mod emit;
-#[cfg(test)]
-mod emit_tests;
-pub mod regs;
-
-use args::*;
-use regs::{create_reg_universe_systemv, show_ireg_sized};
-
-//=============================================================================
-// Instructions (top level): definition
-
-// Don't build these directly.  Instead use the Inst:: functions to create them.
-
-/// Instructions.  Destinations are on the RIGHT (a la AT&T syntax).
-#[derive(Clone)]
-pub(crate) enum Inst {
-    /// nops of various sizes, including zero
-    Nop { len: u8 },
-
-    /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
-    Alu_RMI_R {
-        is_64: bool,
-        op: RMI_R_Op,
-        src: RMI,
-        dst: Writable<Reg>,
-    },
-
-    /// (imm32 imm64) reg.
-    /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
-    Imm_R {
-        dst_is_64: bool,
-        simm64: u64,
-        dst: Writable<Reg>,
-    },
-
-    /// mov (64 32) reg reg
-    Mov_R_R {
-        is_64: bool,
-        src: Reg,
-        dst: Writable<Reg>,
-    },
-
-    /// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
-    /// Note that the lq variant doesn't really exist since the default
-    /// zero-extend rule makes it unnecessary.  For that case we emit the
-    /// equivalent "movl AM, reg32".
-    MovZX_M_R {
-        extMode: ExtMode,
-        addr: Addr,
-        dst: Writable<Reg>,
-    },
-
-    /// A plain 64-bit integer load, since MovZX_M_R can't represent that
-    Mov64_M_R { addr: Addr, dst: Writable<Reg> },
-
-    /// movs (bl bq wl wq lq) addr reg (good for all SX loads)
-    MovSX_M_R {
-        extMode: ExtMode,
-        addr: Addr,
-        dst: Writable<Reg>,
-    },
-
-    /// mov (b w l q) reg addr (good for all integer stores)
-    Mov_R_M {
-        size: u8, // 1, 2, 4 or 8
-        src: Reg,
-        addr: Addr,
-    },
-
-    /// (shl shr sar) (l q) imm reg
-    Shift_R {
-        is_64: bool,
-        kind: ShiftKind,
-        /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
-        num_bits: Option<u8>,
-        dst: Writable<Reg>,
-    },
-
-    /// cmp (b w l q) (reg addr imm) reg
-    Cmp_RMI_R {
-        size: u8, // 1, 2, 4 or 8
-        src: RMI,
-        dst: Reg,
-    },
-
-    /// pushq (reg addr imm)
-    Push64 { src: RMI },
-
-    /// popq reg
-    Pop64 { dst: Writable<Reg> },
-
-    /// call simm32
-    CallKnown {
-        dest: ExternalName,
-        uses: Set<Reg>,
-        defs: Set<Writable<Reg>>,
-    },
-
-    /// callq (reg mem)
-    CallUnknown {
-        dest: RM,
-        //uses: Set<Reg>,
-        //defs: Set<Writable<Reg>>,
-    },
-
-    // ---- branches (exactly one must appear at end of BB) ----
-    /// ret
-    Ret,
-
-    /// A placeholder instruction, generating no code, meaning that a function epilogue must be
-    /// inserted there.
-    EpiloguePlaceholder,
-
-    /// jmp simm32
-    JmpKnown { dest: BranchTarget },
-
-    /// jcond cond target target
-    /// Symmetrical two-way conditional branch.
-    /// Emitted as a compound sequence; the MachBuffer will shrink it
-    /// as appropriate.
-    JmpCondSymm {
-        cc: CC,
-        taken: BranchTarget,
-        not_taken: BranchTarget,
-    },
-
-    /// jmpq (reg mem)
-    JmpUnknown { target: RM },
-}
-
-// Handy constructors for Insts.
-
-// For various sizes, will some number of lowest bits sign extend to be the
-// same as the whole value?
-pub(crate) fn low32willSXto64(x: u64) -> bool {
-    let xs = x as i64;
-    xs == ((xs << 32) >> 32)
-}
-
-impl Inst {
-    pub(crate) fn nop(len: u8) -> Self {
-        debug_assert!(len <= 16);
-        Self::Nop { len }
-    }
-
-    pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Self::Alu_RMI_R {
-            is_64,
-            op,
-            src,
-            dst,
-        }
-    }
-
-    pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        if !dst_is_64 {
-            debug_assert!(low32willSXto64(simm64));
-        }
-        Inst::Imm_R {
-            dst_is_64,
-            simm64,
-            dst,
-        }
-    }
-
-    pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
-        debug_assert!(src.get_class() == RegClass::I64);
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::Mov_R_R { is_64, src, dst }
-    }
-
-    pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::MovZX_M_R { extMode, addr, dst }
-    }
-
-    pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::Mov64_M_R { addr, dst }
-    }
-
-    pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::MovSX_M_R { extMode, addr, dst }
-    }
-
-    pub(crate) fn mov_r_m(
-        size: u8, // 1, 2, 4 or 8
-        src: Reg,
-        addr: Addr,
-    ) -> Inst {
-        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
-        debug_assert!(src.get_class() == RegClass::I64);
-        Inst::Mov_R_M { size, src, addr }
-    }
-
-    pub(crate) fn shift_r(
-        is_64: bool,
-        kind: ShiftKind,
-        num_bits: Option<u8>,
-        dst: Writable<Reg>,
-    ) -> Inst {
-        debug_assert!(if let Some(num_bits) = num_bits {
-            num_bits < if is_64 { 64 } else { 32 }
-        } else {
-            true
-        });
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::Shift_R {
-            is_64,
-            kind,
-            num_bits,
-            dst,
-        }
-    }
-
-    pub(crate) fn cmp_rmi_r(
-        size: u8, // 1, 2, 4 or 8
-        src: RMI,
-        dst: Reg,
-    ) -> Inst {
-        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
-        debug_assert!(dst.get_class() == RegClass::I64);
-        Inst::Cmp_RMI_R { size, src, dst }
-    }
-
-    pub(crate) fn push64(src: RMI) -> Inst {
-        Inst::Push64 { src }
-    }
-
-    pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
-        Inst::Pop64 { dst }
-    }
-
-    pub(crate) fn call_unknown(dest: RM) -> Inst {
-        Inst::CallUnknown { dest }
-    }
-
-    pub(crate) fn ret() -> Inst {
-        Inst::Ret
-    }
-
-    pub(crate) fn epilogue_placeholder() -> Inst {
-        Inst::EpiloguePlaceholder
-    }
-
-    pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
-        Inst::JmpKnown { dest }
-    }
-
-    pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
-        Inst::JmpCondSymm {
-            cc,
-            taken,
-            not_taken,
-        }
-    }
-
-    pub(crate) fn jmp_unknown(target: RM) -> Inst {
-        Inst::JmpUnknown { target }
-    }
-}
-
-//=============================================================================
-// Instructions: printing
-
-impl ShowWithRRU for Inst {
-    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
-        fn ljustify(s: String) -> String {
-            let w = 7;
-            if s.len() >= w {
-                s
-            } else {
-                let need = usize::min(w, w - s.len());
-                s + &format!("{nil: <width$}", nil = "", width = need)
-            }
-        }
-
-        fn ljustify2(s1: String, s2: String) -> String {
-            ljustify(s1 + &s2)
-        }
-
-        fn suffixLQ(is_64: bool) -> String {
-            (if is_64 { "q" } else { "l" }).to_string()
-        }
-
-        fn sizeLQ(is_64: bool) -> u8 {
-            if is_64 {
-                8
-            } else {
-                4
-            }
-        }
-
-        fn suffixBWLQ(size: u8) -> String {
-            match size {
-                1 => "b".to_string(),
-                2 => "w".to_string(),
-                4 => "l".to_string(),
-                8 => "q".to_string(),
-                _ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
-            }
-        }
-
-        match self {
-            Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
-            Inst::Alu_RMI_R {
-                is_64,
-                op,
-                src,
-                dst,
-            } => format!(
-                "{} {}, {}",
-                ljustify2(op.to_string(), suffixLQ(*is_64)),
-                src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
-                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
-            ),
-            Inst::Imm_R {
-                dst_is_64,
-                simm64,
-                dst,
-            } => {
-                if *dst_is_64 {
-                    format!(
-                        "{} ${}, {}",
-                        ljustify("movabsq".to_string()),
-                        *simm64 as i64,
-                        show_ireg_sized(dst.to_reg(), mb_rru, 8)
-                    )
-                } else {
-                    format!(
-                        "{} ${}, {}",
-                        ljustify("movl".to_string()),
-                        (*simm64 as u32) as i32,
-                        show_ireg_sized(dst.to_reg(), mb_rru, 4)
-                    )
-                }
-            }
-            Inst::Mov_R_R { is_64, src, dst } => format!(
-                "{} {}, {}",
-                ljustify2("mov".to_string(), suffixLQ(*is_64)),
-                show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
-                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
-            ),
-            Inst::MovZX_M_R { extMode, addr, dst } => {
-                if *extMode == ExtMode::LQ {
-                    format!(
-                        "{} {}, {}",
-                        ljustify("movl".to_string()),
-                        addr.show_rru(mb_rru),
-                        show_ireg_sized(dst.to_reg(), mb_rru, 4)
-                    )
-                } else {
-                    format!(
-                        "{} {}, {}",
-                        ljustify2("movz".to_string(), extMode.to_string()),
-                        addr.show_rru(mb_rru),
-                        show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
-                    )
-                }
-            }
-            Inst::Mov64_M_R { addr, dst } => format!(
-                "{} {}, {}",
-                ljustify("movq".to_string()),
-                addr.show_rru(mb_rru),
-                dst.show_rru(mb_rru)
-            ),
-            Inst::MovSX_M_R { extMode, addr, dst } => format!(
-                "{} {}, {}",
-                ljustify2("movs".to_string(), extMode.to_string()),
-                addr.show_rru(mb_rru),
-                show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
-            ),
-            Inst::Mov_R_M { size, src, addr } => format!(
-                "{} {}, {}",
-                ljustify2("mov".to_string(), suffixBWLQ(*size)),
-                show_ireg_sized(*src, mb_rru, *size),
-                addr.show_rru(mb_rru)
-            ),
-            Inst::Shift_R {
-                is_64,
-                kind,
-                num_bits,
-                dst,
-            } => match num_bits {
-                None => format!(
-                    "{} %cl, {}",
-                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
-                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
-                ),
-
-                Some(num_bits) => format!(
-                    "{} ${}, {}",
-                    ljustify2(kind.to_string(), suffixLQ(*is_64)),
-                    num_bits,
-                    show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
-                ),
-            },
-            Inst::Cmp_RMI_R { size, src, dst } => format!(
-                "{} {}, {}",
-                ljustify2("cmp".to_string(), suffixBWLQ(*size)),
-                src.show_rru_sized(mb_rru, *size),
-                show_ireg_sized(*dst, mb_rru, *size)
-            ),
-            Inst::Push64 { src } => {
-                format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
-            }
-            Inst::Pop64 { dst } => {
-                format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
-            }
-            //Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
-            Inst::CallKnown { .. } => "**CallKnown**".to_string(),
-            Inst::CallUnknown { dest } => format!(
-                "{} *{}",
-                ljustify("call".to_string()),
-                dest.show_rru(mb_rru)
-            ),
-            Inst::Ret => "ret".to_string(),
-            Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
-            Inst::JmpKnown { dest } => {
-                format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
-            }
-            Inst::JmpCondSymm {
-                cc,
-                taken,
-                not_taken,
-            } => format!(
-                "{} taken={} not_taken={}",
-                ljustify2("j".to_string(), cc.to_string()),
-                taken.show_rru(mb_rru),
-                not_taken.show_rru(mb_rru)
-            ),
-            //
-            Inst::JmpUnknown { target } => format!(
-                "{} *{}",
-                ljustify("jmp".to_string()),
-                target.show_rru(mb_rru)
-            ),
-        }
-    }
-}
-
-// Temp hook for legacy printing machinery
-impl fmt::Debug for Inst {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        // Print the insn without a Universe :-(
-        write!(fmt, "{}", self.show_rru(None))
-    }
-}
-
-fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
-    // This is a bit subtle. If some register is in the modified set, then it may not be in either
-    // the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
-    // regalloc.rs will "fix" this for us by removing the the modified set from the use and def
-    // sets.
-    match inst {
-        // ** Nop
-        Inst::Alu_RMI_R {
-            is_64: _,
-            op: _,
-            src,
-            dst,
-        } => {
-            src.get_regs_as_uses(collector);
-            collector.add_mod(*dst);
-        }
-        Inst::Imm_R {
-            dst_is_64: _,
-            simm64: _,
-            dst,
-        } => {
-            collector.add_def(*dst);
-        }
-        Inst::Mov_R_R { is_64: _, src, dst } => {
-            collector.add_use(*src);
-            collector.add_def(*dst);
-        }
-        Inst::MovZX_M_R {
-            extMode: _,
-            addr,
-            dst,
-        } => {
-            addr.get_regs_as_uses(collector);
-            collector.add_def(*dst);
-        }
-        Inst::Mov64_M_R { addr, dst } => {
-            addr.get_regs_as_uses(collector);
-            collector.add_def(*dst);
-        }
-        Inst::MovSX_M_R {
-            extMode: _,
-            addr,
-            dst,
-        } => {
-            addr.get_regs_as_uses(collector);
-            collector.add_def(*dst);
-        }
-        Inst::Mov_R_M { size: _, src, addr } => {
-            collector.add_use(*src);
-            addr.get_regs_as_uses(collector);
-        }
-        Inst::Shift_R {
-            is_64: _,
-            kind: _,
-            num_bits,
-            dst,
-        } => {
-            if num_bits.is_none() {
-                collector.add_use(regs::rcx());
-            }
-            collector.add_mod(*dst);
-        }
-        Inst::Cmp_RMI_R { size: _, src, dst } => {
-            src.get_regs_as_uses(collector);
-            collector.add_use(*dst); // yes, really `add_use`
-        }
-        Inst::Push64 { src } => {
-            src.get_regs_as_uses(collector);
-            collector.add_mod(Writable::from_reg(regs::rsp()));
-        }
-        Inst::Pop64 { dst } => {
-            collector.add_def(*dst);
-        }
-        Inst::CallKnown {
-            dest: _,
-            uses: _,
-            defs: _,
-        } => {
-            // FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
-            unimplemented!();
-        }
-        Inst::CallUnknown { dest } => {
-            dest.get_regs_as_uses(collector);
-        }
-        Inst::Ret => {}
-        Inst::EpiloguePlaceholder => {}
-        Inst::JmpKnown { dest: _ } => {}
-        Inst::JmpCondSymm {
-            cc: _,
-            taken: _,
-            not_taken: _,
-        } => {}
-        //Inst::JmpUnknown { target } => {
-        //    target.get_regs_as_uses(collector);
-        //}
-        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
-    }
-}
-
-//=============================================================================
-// Instructions and subcomponents: map_regs
-
-fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
-    if r.is_virtual() {
-        let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
-        *r = new;
-    }
-}
-
-fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
-    if r.to_reg().is_virtual() {
-        let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
-        *r = Writable::from_reg(new);
-    }
-}
-
-fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
-    if r.to_reg().is_virtual() {
-        let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
-        *r = Writable::from_reg(new);
-    }
-}
-
-impl Addr {
-    fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
-        match self {
-            Addr::IR {
-                simm32: _,
-                ref mut base,
-            } => map_use(map, base),
-            Addr::IRRS {
-                simm32: _,
-                ref mut base,
-                ref mut index,
-                shift: _,
-            } => {
-                map_use(map, base);
-                map_use(map, index);
-            }
-        }
-    }
-}
-
-impl RMI {
-    fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
-        match self {
-            RMI::R { ref mut reg } => map_use(map, reg),
-            RMI::M { ref mut addr } => addr.map_uses(map),
-            RMI::I { simm32: _ } => {}
-        }
-    }
-}
-
-impl RM {
-    fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
-        match self {
-            RM::R { ref mut reg } => map_use(map, reg),
-            RM::M { ref mut addr } => addr.map_uses(map),
-        }
-    }
-}
-
-fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
-    // Note this must be carefully synchronized with x64_get_regs.
-    match inst {
-        // ** Nop
-        Inst::Alu_RMI_R {
-            is_64: _,
-            op: _,
-            ref mut src,
-            ref mut dst,
-        } => {
-            src.map_uses(mapper);
-            map_mod(mapper, dst);
-        }
-        Inst::Imm_R {
-            dst_is_64: _,
-            simm64: _,
-            ref mut dst,
-        } => map_def(mapper, dst),
-        Inst::Mov_R_R {
-            is_64: _,
-            ref mut src,
-            ref mut dst,
-        } => {
-            map_use(mapper, src);
-            map_def(mapper, dst);
-        }
-        Inst::MovZX_M_R {
-            extMode: _,
-            ref mut addr,
-            ref mut dst,
-        } => {
-            addr.map_uses(mapper);
-            map_def(mapper, dst);
-        }
-        Inst::Mov64_M_R { addr, dst } => {
-            addr.map_uses(mapper);
-            map_def(mapper, dst);
-        }
-        Inst::MovSX_M_R {
-            extMode: _,
-            ref mut addr,
-            ref mut dst,
-        } => {
-            addr.map_uses(mapper);
-            map_def(mapper, dst);
-        }
-        Inst::Mov_R_M {
-            size: _,
-            ref mut src,
-            ref mut addr,
-        } => {
-            map_use(mapper, src);
-            addr.map_uses(mapper);
-        }
-        Inst::Shift_R {
-            is_64: _,
-            kind: _,
-            num_bits: _,
-            ref mut dst,
-        } => {
-            map_mod(mapper, dst);
-        }
-        Inst::Cmp_RMI_R {
-            size: _,
-            ref mut src,
-            ref mut dst,
-        } => {
-            src.map_uses(mapper);
-            map_use(mapper, dst);
-        }
-        Inst::Push64 { ref mut src } => src.map_uses(mapper),
-        Inst::Pop64 { ref mut dst } => {
-            map_def(mapper, dst);
-        }
-        Inst::CallKnown {
-            dest: _,
-            uses: _,
-            defs: _,
-        } => {}
-        Inst::CallUnknown { dest } => dest.map_uses(mapper),
-        Inst::Ret => {}
-        Inst::EpiloguePlaceholder => {}
-        Inst::JmpKnown { dest: _ } => {}
-        Inst::JmpCondSymm {
-            cc: _,
-            taken: _,
-            not_taken: _,
-        } => {}
-        //Inst::JmpUnknown { target } => {
-        //    target.apply_map(mapper);
-        //}
-        Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
-    }
-}
-
-//=============================================================================
-// Instructions: misc functions and external interface
-
-impl MachInst for Inst {
-    fn get_regs(&self, collector: &mut RegUsageCollector) {
-        x64_get_regs(&self, collector)
-    }
-
-    fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
-        x64_map_regs(self, mapper);
-    }
-
-    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
-        // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
-        // out the upper 32 bits of the destination.  For example, we could
-        // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
-        // %reg.
-        match self {
-            Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
-            _ => None,
-        }
-    }
-
-    fn is_epilogue_placeholder(&self) -> bool {
-        if let Self::EpiloguePlaceholder = self {
-            true
-        } else {
-            false
-        }
-    }
-
-    fn is_term<'a>(&'a self) -> MachTerminator<'a> {
-        match self {
-            // Interesting cases.
-            &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
-            &Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
-            &Self::JmpCondSymm {
-                cc: _,
-                taken,
-                not_taken,
-            } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
-            // All other cases are boring.
-            _ => MachTerminator::None,
-        }
-    }
-
-    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
-        let rc_dst = dst_reg.to_reg().get_class();
-        let rc_src = src_reg.get_class();
-        // If this isn't true, we have gone way off the rails.
-        debug_assert!(rc_dst == rc_src);
-        match rc_dst {
-            RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
-            _ => panic!("gen_move(x64): unhandled regclass"),
-        }
-    }
-
-    fn gen_zero_len_nop() -> Inst {
-        unimplemented!()
-    }
-
-    fn gen_nop(_preferred_size: usize) -> Inst {
-        unimplemented!()
-    }
-
-    fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
-        None
-    }
-
-    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
-        match ty {
-            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
-            F32 | F64 | I128 | B128 => Ok(RegClass::V128),
-            _ => Err(CodegenError::Unsupported(format!(
-                "Unexpected SSA-value type: {}",
-                ty
-            ))),
-        }
-    }
-
-    fn gen_jump(label: MachLabel) -> Inst {
-        Inst::jmp_known(BranchTarget::Label(label))
-    }
-
-    fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
-        let mut ret = SmallVec::new();
-        let is64 = value > 0xffff_ffff;
-        ret.push(Inst::imm_r(is64, value, to_reg));
-        ret
-    }
-
-    fn reg_universe(flags: &Flags) -> RealRegUniverse {
-        create_reg_universe_systemv(flags)
-    }
-
-    fn worst_case_size() -> CodeOffset {
-        15
-    }
-
-    type LabelUse = LabelUse;
-}
-
-impl MachInstEmit for Inst {
-    type State = ();
-
-    fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
-        emit::emit(self, sink);
-    }
-}
-
-/// A label-use (internal relocation) in generated code.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub(crate) enum LabelUse {
-    /// A 32-bit offset from location of relocation itself, added to the
-    /// existing value at that location.
-    Rel32,
-}
-
-impl MachInstLabelUse for LabelUse {
-    const ALIGN: CodeOffset = 1;
-
-    fn max_pos_range(self) -> CodeOffset {
-        match self {
-            LabelUse::Rel32 => 0x7fff_ffff,
-        }
-    }
-
-    fn max_neg_range(self) -> CodeOffset {
-        match self {
-            LabelUse::Rel32 => 0x8000_0000,
-        }
-    }
-
-    fn patch_size(self) -> CodeOffset {
-        match self {
-            LabelUse::Rel32 => 4,
-        }
-    }
-
-    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
-        match self {
-            LabelUse::Rel32 => {
-                let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
-                let value = i32::try_from(label_offset)
-                    .unwrap()
-                    .wrapping_sub(i32::try_from(use_offset).unwrap())
-                    .wrapping_add(addend);
-                buffer.copy_from_slice(&value.to_le_bytes()[..]);
-            }
-        }
-    }
-
-    fn supports_veneer(self) -> bool {
-        match self {
-            LabelUse::Rel32 => false,
-        }
-    }
-
-    fn veneer_size(self) -> CodeOffset {
-        match self {
-            LabelUse::Rel32 => 0,
-        }
-    }
-
-    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
-        match self {
-            LabelUse::Rel32 => {
-                panic!("Veneer not supported for Rel32 label-use.");
-            }
-        }
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
@ -1,261 +0,0 @@
-//! Registers, the Universe thereof, and printing.
-//!
-//! These are ordered by sequence number, as required in the Universe.  The strange ordering is
-//! intended to make callee-save registers available before caller-saved ones.  This is a net win
-//! provided that each function makes at least one onward call.  It'll be a net loss for leaf
-//! functions, and we should change the ordering in that case, so as to make caller-save regs
-//! available first.
-//!
-//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
-//! Also, they will have to be ABI dependent.  Need to find a way to avoid constructing a universe
-//! for each function we compile.
-
-use alloc::vec::Vec;
-use std::string::String;
-
-use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
-
-use crate::machinst::pretty_print::ShowWithRRU;
-use crate::settings;
-
-// Hardware encodings for a few registers.
-
-pub const ENC_RBX: u8 = 3;
-pub const ENC_RSP: u8 = 4;
-pub const ENC_RBP: u8 = 5;
-pub const ENC_R12: u8 = 12;
-pub const ENC_R13: u8 = 13;
-pub const ENC_R14: u8 = 14;
-pub const ENC_R15: u8 = 15;
-
-fn gpr(enc: u8, index: u8) -> Reg {
-    Reg::new_real(RegClass::I64, enc, index)
-}
-
-pub(crate) fn r12() -> Reg {
-    gpr(ENC_R12, 0)
-}
-pub(crate) fn r13() -> Reg {
-    gpr(ENC_R13, 1)
-}
-pub(crate) fn r14() -> Reg {
-    gpr(ENC_R14, 2)
-}
-pub(crate) fn r15() -> Reg {
-    gpr(ENC_R15, 3)
-}
-pub(crate) fn rbx() -> Reg {
-    gpr(ENC_RBX, 4)
-}
-pub(crate) fn rsi() -> Reg {
-    gpr(6, 5)
-}
-pub(crate) fn rdi() -> Reg {
-    gpr(7, 6)
-}
-pub(crate) fn rax() -> Reg {
-    gpr(0, 7)
-}
-pub(crate) fn rcx() -> Reg {
-    gpr(1, 8)
-}
-pub(crate) fn rdx() -> Reg {
-    gpr(2, 9)
-}
-pub(crate) fn r8() -> Reg {
-    gpr(8, 10)
-}
-pub(crate) fn r9() -> Reg {
-    gpr(9, 11)
-}
-pub(crate) fn r10() -> Reg {
-    gpr(10, 12)
-}
-pub(crate) fn r11() -> Reg {
-    gpr(11, 13)
-}
-
-fn fpr(enc: u8, index: u8) -> Reg {
-    Reg::new_real(RegClass::V128, enc, index)
-}
-fn xmm0() -> Reg {
-    fpr(0, 14)
-}
-fn xmm1() -> Reg {
-    fpr(1, 15)
-}
-fn xmm2() -> Reg {
-    fpr(2, 16)
-}
-fn xmm3() -> Reg {
-    fpr(3, 17)
-}
-fn xmm4() -> Reg {
-    fpr(4, 18)
-}
-fn xmm5() -> Reg {
-    fpr(5, 19)
-}
-fn xmm6() -> Reg {
-    fpr(6, 20)
-}
-fn xmm7() -> Reg {
-    fpr(7, 21)
-}
-fn xmm8() -> Reg {
-    fpr(8, 22)
-}
-fn xmm9() -> Reg {
-    fpr(9, 23)
-}
-fn xmm10() -> Reg {
-    fpr(10, 24)
-}
-fn xmm11() -> Reg {
-    fpr(11, 25)
-}
-fn xmm12() -> Reg {
-    fpr(12, 26)
-}
-fn xmm13() -> Reg {
-    fpr(13, 27)
-}
-fn xmm14() -> Reg {
-    fpr(14, 28)
-}
-fn xmm15() -> Reg {
-    fpr(15, 29)
-}
-
-pub(crate) fn rsp() -> Reg {
-    gpr(ENC_RSP, 30)
-}
-pub(crate) fn rbp() -> Reg {
-    gpr(ENC_RBP, 31)
-}
-
-/// Create the register universe for X64.
-///
-/// The ordering of registers matters, as commented in the file doc comment: assumes the
-/// calling-convention is SystemV, at the moment.
-pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
-    let mut regs = Vec::<(RealReg, String)>::new();
-    let mut allocable_by_class = [None; NUM_REG_CLASSES];
-
-    // Integer regs.
-    let mut base = regs.len();
-
-    // Callee-saved, in the SystemV x86_64 ABI.
-    regs.push((r12().to_real_reg(), "%r12".into()));
-    regs.push((r13().to_real_reg(), "%r13".into()));
-    regs.push((r14().to_real_reg(), "%r14".into()));
-    regs.push((r15().to_real_reg(), "%r15".into()));
-    regs.push((rbx().to_real_reg(), "%rbx".into()));
-
-    // Caller-saved, in the SystemV x86_64 ABI.
-    regs.push((rsi().to_real_reg(), "%rsi".into()));
-    regs.push((rdi().to_real_reg(), "%rdi".into()));
-    regs.push((rax().to_real_reg(), "%rax".into()));
-    regs.push((rcx().to_real_reg(), "%rcx".into()));
-    regs.push((rdx().to_real_reg(), "%rdx".into()));
-    regs.push((r8().to_real_reg(), "%r8".into()));
-    regs.push((r9().to_real_reg(), "%r9".into()));
-    regs.push((r10().to_real_reg(), "%r10".into()));
-    regs.push((r11().to_real_reg(), "%r11".into()));
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
-        suggested_scratch: Some(r12().get_index()),
-    });
-
-    // XMM registers
-    base = regs.len();
-    regs.push((xmm0().to_real_reg(), "%xmm0".into()));
-    regs.push((xmm1().to_real_reg(), "%xmm1".into()));
-    regs.push((xmm2().to_real_reg(), "%xmm2".into()));
-    regs.push((xmm3().to_real_reg(), "%xmm3".into()));
-    regs.push((xmm4().to_real_reg(), "%xmm4".into()));
-    regs.push((xmm5().to_real_reg(), "%xmm5".into()));
-    regs.push((xmm6().to_real_reg(), "%xmm6".into()));
-    regs.push((xmm7().to_real_reg(), "%xmm7".into()));
-    regs.push((xmm8().to_real_reg(), "%xmm8".into()));
-    regs.push((xmm9().to_real_reg(), "%xmm9".into()));
-    regs.push((xmm10().to_real_reg(), "%xmm10".into()));
-    regs.push((xmm11().to_real_reg(), "%xmm11".into()));
-    regs.push((xmm12().to_real_reg(), "%xmm12".into()));
-    regs.push((xmm13().to_real_reg(), "%xmm13".into()));
-    regs.push((xmm14().to_real_reg(), "%xmm14".into()));
-    regs.push((xmm15().to_real_reg(), "%xmm15".into()));
-
-    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
-        suggested_scratch: Some(xmm15().get_index()),
-    });
-
-    // Other regs, not available to the allocator.
-    let allocable = regs.len();
-    regs.push((rsp().to_real_reg(), "%rsp".into()));
-    regs.push((rbp().to_real_reg(), "%rbp".into()));
-
-    RealRegUniverse {
-        regs,
-        allocable,
-        allocable_by_class,
-    }
-}
-
-/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
-/// smaller size (4, 2 or 1 bytes).
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
-    let mut s = reg.show_rru(mb_rru);
-
-    if reg.get_class() != RegClass::I64 || size == 8 {
-        // We can't do any better.
-        return s;
-    }
-
-    if reg.is_real() {
-        // Change (eg) "rax" into "eax", "ax" or "al" as appropriate.  This is something one could
-        // describe diplomatically as "a kludge", but it's only debug code.
-        let remapper = match s.as_str() {
-            "%rax" => Some(["%eax", "%ax", "%al"]),
-            "%rbx" => Some(["%ebx", "%bx", "%bl"]),
-            "%rcx" => Some(["%ecx", "%cx", "%cl"]),
-            "%rdx" => Some(["%edx", "%dx", "%dl"]),
-            "%rsi" => Some(["%esi", "%si", "%sil"]),
-            "%rdi" => Some(["%edi", "%di", "%dil"]),
-            "%rbp" => Some(["%ebp", "%bp", "%bpl"]),
-            "%rsp" => Some(["%esp", "%sp", "%spl"]),
-            "%r8" => Some(["%r8d", "%r8w", "%r8b"]),
-            "%r9" => Some(["%r9d", "%r9w", "%r9b"]),
-            "%r10" => Some(["%r10d", "%r10w", "%r10b"]),
-            "%r11" => Some(["%r11d", "%r11w", "%r11b"]),
-            "%r12" => Some(["%r12d", "%r12w", "%r12b"]),
-            "%r13" => Some(["%r13d", "%r13w", "%r13b"]),
-            "%r14" => Some(["%r14d", "%r14w", "%r14b"]),
-            "%r15" => Some(["%r15d", "%r15w", "%r15b"]),
-            _ => None,
-        };
-        if let Some(smaller_names) = remapper {
-            match size {
-                4 => s = smaller_names[0].into(),
-                2 => s = smaller_names[1].into(),
-                1 => s = smaller_names[2].into(),
-                _ => panic!("show_ireg_sized: real"),
-            }
-        }
-    } else {
-        // Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
-        let suffix = match size {
-            4 => "l",
-            2 => "w",
-            1 => "b",
-            _ => panic!("show_ireg_sized: virtual"),
-        };
-        s = s + suffix;
-    }
-
-    s
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
@ -1,343 +0,0 @@
-//! Lowering rules for X64.
-
-#![allow(dead_code)]
-#![allow(non_snake_case)]
-
-use regalloc::{Reg, Writable};
-
-use crate::ir::condcodes::IntCC;
-use crate::ir::types;
-use crate::ir::Inst as IRInst;
-use crate::ir::{InstructionData, Opcode, Type};
-
-use crate::machinst::lower::*;
-use crate::machinst::*;
-use crate::result::CodegenResult;
-
-use crate::isa::x64::inst::args::*;
-use crate::isa::x64::inst::*;
-use crate::isa::x64::X64Backend;
-
-/// Context passed to all lowering functions.
-type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
-
-//=============================================================================
-// Helpers for instruction lowering.
-
-fn is_int_ty(ty: Type) -> bool {
-    match ty {
-        types::I8 | types::I16 | types::I32 | types::I64 => true,
-        _ => false,
-    }
-}
-
-fn int_ty_to_is64(ty: Type) -> bool {
-    match ty {
-        types::I8 | types::I16 | types::I32 => false,
-        types::I64 => true,
-        _ => panic!("type {} is none of I8, I16, I32 or I64", ty),
-    }
-}
-
-fn int_ty_to_sizeB(ty: Type) -> u8 {
-    match ty {
-        types::I8 => 1,
-        types::I16 => 2,
-        types::I32 => 4,
-        types::I64 => 8,
-        _ => panic!("ity_to_sizeB"),
-    }
-}
-
-fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
-    let inst_data = ctx.data(iri);
-    if inst_data.opcode() == Opcode::Null {
-        Some(0)
-    } else {
-        match inst_data {
-            &InstructionData::UnaryImm { opcode: _, imm } => {
-                // Only has Into for i64; we use u64 elsewhere, so we cast.
-                let imm: i64 = imm.into();
-                Some(imm as u64)
-            }
-            _ => None,
-        }
-    }
-}
-
-fn inst_condcode(data: &InstructionData) -> IntCC {
-    match data {
-        &InstructionData::IntCond { cond, .. }
-        | &InstructionData::BranchIcmp { cond, .. }
-        | &InstructionData::IntCompare { cond, .. }
-        | &InstructionData::IntCondTrap { cond, .. }
-        | &InstructionData::BranchInt { cond, .. }
-        | &InstructionData::IntSelect { cond, .. }
-        | &InstructionData::IntCompareImm { cond, .. } => cond,
-        _ => panic!("inst_condcode(x64): unhandled: {:?}", data),
-    }
-}
-
-fn intCC_to_x64_CC(cc: IntCC) -> CC {
-    match cc {
-        IntCC::Equal => CC::Z,
-        IntCC::NotEqual => CC::NZ,
-        IntCC::SignedGreaterThanOrEqual => CC::NL,
-        IntCC::SignedGreaterThan => CC::NLE,
-        IntCC::SignedLessThanOrEqual => CC::LE,
-        IntCC::SignedLessThan => CC::L,
-        IntCC::UnsignedGreaterThanOrEqual => CC::NB,
-        IntCC::UnsignedGreaterThan => CC::NBE,
-        IntCC::UnsignedLessThanOrEqual => CC::BE,
-        IntCC::UnsignedLessThan => CC::B,
-        IntCC::Overflow => CC::O,
-        IntCC::NotOverflow => CC::NO,
-    }
-}
-
-fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
-    let inputs = ctx.get_input(iri, input);
-    ctx.use_input_reg(inputs);
-    inputs.reg
-}
-
-fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
-    ctx.get_output(iri, output)
-}
-
-//=============================================================================
-// Top-level instruction lowering entry point, for one instruction.
-
-/// Actually codegen an instruction's results into registers.
-fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
-    let op = ctx.data(iri).opcode();
-    let ty = if ctx.num_outputs(iri) == 1 {
-        Some(ctx.output_ty(iri, 0))
-    } else {
-        None
-    };
-
-    // This is all outstandingly feeble.  TODO: much better!
-
-    match op {
-        Opcode::Iconst => {
-            if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
-                // Get exactly the bit pattern in 'w64' into the dest.  No
-                // monkeying with sign extension etc.
-                let dstIs64 = w64 > 0xFFFF_FFFF;
-                let regD = output_to_reg(ctx, iri, 0);
-                ctx.emit(Inst::imm_r(dstIs64, w64, regD));
-            } else {
-                unimplemented!();
-            }
-        }
-
-        Opcode::Iadd | Opcode::Isub => {
-            let regD = output_to_reg(ctx, iri, 0);
-            let regL = input_to_reg(ctx, iri, 0);
-            let regR = input_to_reg(ctx, iri, 1);
-            let is64 = int_ty_to_is64(ty.unwrap());
-            let how = if op == Opcode::Iadd {
-                RMI_R_Op::Add
-            } else {
-                RMI_R_Op::Sub
-            };
-            ctx.emit(Inst::mov_r_r(true, regL, regD));
-            ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
-        }
-
-        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
-            // TODO: implement imm shift value into insn
-            let tySL = ctx.input_ty(iri, 0);
-            let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
-            let regSL = input_to_reg(ctx, iri, 0);
-            let regSR = input_to_reg(ctx, iri, 1);
-            let regD = output_to_reg(ctx, iri, 0);
-            if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
-                let how = match op {
-                    Opcode::Ishl => ShiftKind::Left,
-                    Opcode::Ushr => ShiftKind::RightZ,
-                    Opcode::Sshr => ShiftKind::RightS,
-                    _ => unreachable!(),
-                };
-                let is64 = tyD == types::I64;
-                let r_rcx = regs::rcx();
-                let w_rcx = Writable::<Reg>::from_reg(r_rcx);
-                ctx.emit(Inst::mov_r_r(true, regSL, regD));
-                ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
-                ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
-            } else {
-                unimplemented!()
-            }
-        }
-
-        Opcode::Uextend | Opcode::Sextend => {
-            // TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
-            // don't accept a register source operand.  They should be changed
-            // so as to have _RM_R form.
-            // TODO2: if the source operand is a load, incorporate that.
-            let isZX = op == Opcode::Uextend;
-            let tyS = ctx.input_ty(iri, 0);
-            let tyD = ctx.output_ty(iri, 0);
-            let regS = input_to_reg(ctx, iri, 0);
-            let regD = output_to_reg(ctx, iri, 0);
-            ctx.emit(Inst::mov_r_r(true, regS, regD));
-            match (tyS, tyD, isZX) {
-                (types::I8, types::I64, false) => {
-                    ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
-                    ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
-                }
-                _ => unimplemented!(),
-            }
-        }
-
-        Opcode::FallthroughReturn | Opcode::Return => {
-            for i in 0..ctx.num_inputs(iri) {
-                let src_reg = input_to_reg(ctx, iri, i);
-                let retval_reg = ctx.retval(i);
-                ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
-            }
-            // N.B.: the Ret itself is generated by the ABI.
-        }
-
-        Opcode::IaddImm
-        | Opcode::ImulImm
-        | Opcode::UdivImm
-        | Opcode::SdivImm
-        | Opcode::UremImm
-        | Opcode::SremImm
-        | Opcode::IrsubImm
-        | Opcode::IaddCin
-        | Opcode::IaddIfcin
-        | Opcode::IaddCout
-        | Opcode::IaddIfcout
-        | Opcode::IaddCarry
-        | Opcode::IaddIfcarry
-        | Opcode::IsubBin
-        | Opcode::IsubIfbin
-        | Opcode::IsubBout
-        | Opcode::IsubIfbout
-        | Opcode::IsubBorrow
-        | Opcode::IsubIfborrow
-        | Opcode::BandImm
-        | Opcode::BorImm
-        | Opcode::BxorImm
-        | Opcode::RotlImm
-        | Opcode::RotrImm
-        | Opcode::IshlImm
-        | Opcode::UshrImm
-        | Opcode::SshrImm => {
-            panic!("ALU+imm and ALU+carry ops should not appear here!");
-        }
-
-        _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
-    }
-}
-
-//=============================================================================
-// Lowering-backend trait implementation.
-
-impl LowerBackend for X64Backend {
-    type MInst = Inst;
-
-    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
-        lower_insn_to_regs(ctx, ir_inst);
-        Ok(())
-    }
-
-    fn lower_branch_group<C: LowerCtx<I = Inst>>(
-        &self,
-        ctx: &mut C,
-        branches: &[IRInst],
-        targets: &[MachLabel],
-        fallthrough: Option<MachLabel>,
-    ) -> CodegenResult<()> {
-        // A block should end with at most two branches. The first may be a
-        // conditional branch; a conditional branch can be followed only by an
-        // unconditional branch or fallthrough. Otherwise, if only one branch,
-        // it may be an unconditional branch, a fallthrough, a return, or a
-        // trap. These conditions are verified by `is_ebb_basic()` during the
-        // verifier pass.
-        assert!(branches.len() <= 2);
-
-        let mut unimplemented = false;
-
-        if branches.len() == 2 {
-            // Must be a conditional branch followed by an unconditional branch.
-            let op0 = ctx.data(branches[0]).opcode();
-            let op1 = ctx.data(branches[1]).opcode();
-
-            println!(
-                "QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
-                op0, op1
-            );
-
-            assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
-            let taken = BranchTarget::Label(targets[0]);
-            let not_taken = match op1 {
-                Opcode::Jump => BranchTarget::Label(targets[1]),
-                Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
-                _ => unreachable!(), // assert above.
-            };
-            match op0 {
-                Opcode::Brz | Opcode::Brnz => {
-                    let tyS = ctx.input_ty(branches[0], 0);
-                    if is_int_ty(tyS) {
-                        let rS = input_to_reg(ctx, branches[0], 0);
-                        let cc = match op0 {
-                            Opcode::Brz => CC::Z,
-                            Opcode::Brnz => CC::NZ,
-                            _ => unreachable!(),
-                        };
-                        let sizeB = int_ty_to_sizeB(tyS);
-                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
-                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
-                    } else {
-                        unimplemented = true;
-                    }
-                }
-                Opcode::BrIcmp => {
-                    let tyS = ctx.input_ty(branches[0], 0);
-                    if is_int_ty(tyS) {
-                        let rSL = input_to_reg(ctx, branches[0], 0);
-                        let rSR = input_to_reg(ctx, branches[0], 1);
-                        let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
-                        let sizeB = int_ty_to_sizeB(tyS);
-                        // FIXME verify rSR vs rSL ordering
-                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
-                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
-                    } else {
-                        unimplemented = true;
-                    }
-                }
-                // TODO: Brif/icmp, Brff/icmp, jump tables
-                _ => {
-                    unimplemented = true;
-                }
-            }
-        } else {
-            assert!(branches.len() == 1);
-
-            // Must be an unconditional branch or trap.
-            let op = ctx.data(branches[0]).opcode();
-            match op {
-                Opcode::Jump => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
-                }
-                Opcode::Fallthrough => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
-                }
-                Opcode::Trap => {
-                    unimplemented = true;
-                }
-                _ => panic!("Unknown branch type!"),
-            }
-        }
-
-        if unimplemented {
-            unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
-        }
-
-        Ok(())
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@ -1,112 +0,0 @@
-//! X86_64-bit Instruction Set Architecture.
-
-use alloc::boxed::Box;
-
-use regalloc::RealRegUniverse;
-use target_lexicon::Triple;
-
-use crate::ir::condcodes::IntCC;
-use crate::ir::Function;
-use crate::isa::Builder as IsaBuilder;
-use crate::machinst::pretty_print::ShowWithRRU;
-use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
-use crate::result::CodegenResult;
-use crate::settings::{self, Flags};
-
-use crate::isa::x64::inst::regs::create_reg_universe_systemv;
-
-mod abi;
-mod inst;
-mod lower;
-
-/// An X64 backend.
-pub(crate) struct X64Backend {
-    triple: Triple,
-    flags: Flags,
-    reg_universe: RealRegUniverse,
-}
-
-impl X64Backend {
-    /// Create a new X64 backend with the given (shared) flags.
-    fn new_with_flags(triple: Triple, flags: Flags) -> Self {
-        let reg_universe = create_reg_universe_systemv(&flags);
-        Self {
-            triple,
-            flags,
-            reg_universe,
-        }
-    }
-
-    fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
-        // This performs lowering to VCode, register-allocates the code, computes
-        // block layout and finalizes branches. The result is ready for binary emission.
-        let abi = Box::new(abi::X64ABIBody::new(&func, flags));
-        compile::compile::<Self>(&func, self, abi)
-    }
-}
-
-impl MachBackend for X64Backend {
-    fn compile_function(
-        &self,
-        func: &Function,
-        want_disasm: bool,
-    ) -> CodegenResult<MachCompileResult> {
-        let flags = self.flags();
-        let vcode = self.compile_vcode(func, flags.clone())?;
-        let buffer = vcode.emit();
-        let buffer = buffer.finish();
-        let frame_size = vcode.frame_size();
-
-        let disasm = if want_disasm {
-            Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
-        } else {
-            None
-        };
-
-        Ok(MachCompileResult {
-            buffer,
-            frame_size,
-            disasm,
-        })
-    }
-
-    fn flags(&self) -> &Flags {
-        &self.flags
-    }
-
-    fn name(&self) -> &'static str {
-        "x64"
-    }
-
-    fn triple(&self) -> Triple {
-        self.triple.clone()
-    }
-
-    fn reg_universe(&self) -> &RealRegUniverse {
-        &self.reg_universe
-    }
-
-    fn unsigned_add_overflow_condition(&self) -> IntCC {
-        // Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
-        // overflow of an add.
-        IntCC::UnsignedGreaterThanOrEqual
-    }
-
-    fn unsigned_sub_overflow_condition(&self) -> IntCC {
-        // unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
-        // underflow of a subtract (carry is borrow for subtract).
-        IntCC::UnsignedGreaterThanOrEqual
-    }
-}
-
-/// Create a new `isa::Builder`.
-pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
-    IsaBuilder {
-        triple,
-        setup: settings::builder(),
-        constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
-            let backend = X64Backend::new_with_flags(triple, flags);
-            Box::new(TargetIsaAdapter::new(backend))
-        },
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
@ -6,6 +6,7 @@ use super::settings as isa_settings;
 use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
 use crate::cursor::{Cursor, CursorPosition, EncCursor};
 use crate::ir;
+use crate::ir::entities::StackSlot;
 use crate::ir::immediates::Imm64;
 use crate::ir::stackslot::{StackOffset, StackSize};
 use crate::ir::types;
@ -18,6 +19,7 @@ use crate::regalloc::RegisterSet;
 use crate::result::CodegenResult;
 use crate::stack_layout::layout_stack;
 use alloc::borrow::Cow;
+use alloc::vec::Vec;
 use core::i32;
 use target_lexicon::{PointerWidth, Triple};

@ -42,7 +44,7 @@ static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
 ///
 /// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
 /// convention reserves spill space for parameters, you don’t have to use them as such"
-const WIN_SHADOW_STACK_SPACE: StackSize = 32;
+const WIN_SHADOW_STACK_SPACE: i32 = 32;

 /// Stack alignment requirement for functions.
 ///
@ -70,7 +72,6 @@ struct Args {
    shared_flags: shared_settings::Flags,
    #[allow(dead_code)]
    isa_flags: isa_settings::Flags,
-    assigning_returns: bool,
 }

 impl Args {
@ -81,13 +82,12 @@ impl Args {
        call_conv: CallConv,
        shared_flags: &shared_settings::Flags,
        isa_flags: &isa_settings::Flags,
-        assigning_returns: bool,
    ) -> Self {
        let offset = if call_conv.extends_windows_fastcall() {
            WIN_SHADOW_STACK_SPACE
        } else {
            0
-        };
+        } as u32;

        Self {
            pointer_bytes: bits / 8,
@ -101,7 +101,6 @@ impl Args {
            call_conv,
            shared_flags: shared_flags.clone(),
            isa_flags: isa_flags.clone(),
-            assigning_returns,
        }
    }
 }
@ -110,17 +109,6 @@ impl ArgAssigner for Args {
    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
        let ty = arg.value_type;

-        if ty.bits() > u16::from(self.pointer_bits) {
-            if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
-                // "Any argument that doesn't fit in 8 bytes, or isn't
-                // 1, 2, 4, or 8 bytes, must be passed by reference"
-                return ValueConversion::Pointer(self.pointer_type).into();
-            } else if !ty.is_vector() && !ty.is_float() {
-                // On SystemV large integers and booleans are broken down to fit in a register.
-                return ValueConversion::IntSplit.into();
-            }
-        }
-
        // Vectors should stay in vector registers unless SIMD is not enabled--then they are split
        if ty.is_vector() {
            if self.shared_flags.enable_simd() {
@ -131,6 +119,11 @@ impl ArgAssigner for Args {
            return ValueConversion::VectorSplit.into();
        }

+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
+            return ValueConversion::IntSplit.into();
+        }
+
        // Small integers are extended to the size of a pointer register.
        if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
            match arg.extension {
@ -212,7 +205,7 @@ pub fn legalize_signature(
        PointerWidth::U16 => panic!(),
        PointerWidth::U32 => {
            bits = 32;
-            args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
+            args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags);
        }
        PointerWidth::U64 => {
            bits = 64;
@ -224,7 +217,6 @@ pub fn legalize_signature(
                    sig.call_conv,
                    shared_flags,
                    isa_flags,
-                    false,
                )
            } else {
                Args::new(
@ -234,7 +226,6 @@ pub fn legalize_signature(
                    sig.call_conv,
                    shared_flags,
                    isa_flags,
-                    false,
                )
            };
        }
@ -254,20 +245,26 @@ pub fn legalize_signature(
        sig.call_conv,
        shared_flags,
        isa_flags,
-        true,
    );

-    // If we don't have enough available return registers
-    // to fit all of the return values, we need to backtrack and start
+    let sig_is_multi_return = sig.is_multi_return();
+
+    // If this is a multi-value return and we don't have enough available return
+    // registers to fit all of the return values, we need to backtrack and start
    // assigning locations all over again with a different strategy. In order to
    // do that, we need a copy of the original assigner for the returns.
-    let mut backup_rets = rets.clone();
+    let backup_rets_for_struct_return = if sig_is_multi_return {
+        Some(rets.clone())
+    } else {
+        None
+    };

    if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
-        if new_returns
-            .iter()
-            .filter(|r| r.purpose == ArgumentPurpose::Normal)
-            .any(|r| !r.location.is_reg())
+        if sig.is_multi_return()
+            && new_returns
+                .iter()
+                .filter(|r| r.purpose == ArgumentPurpose::Normal)
+                .any(|r| !r.location.is_reg())
        {
            // The return values couldn't all fit into available return
            // registers. Introduce the use of a struct-return parameter.
@ -279,7 +276,6 @@ pub fn legalize_signature(
                purpose: ArgumentPurpose::StructReturn,
                extension: ArgumentExtension::None,
                location: ArgumentLoc::Unassigned,
-                legalized_to_pointer: false,
            };
            match args.assign(&ret_ptr_param) {
                ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
@ -289,6 +285,8 @@ pub fn legalize_signature(
                _ => unreachable!("return pointer should always get a register assignment"),
            }

+            let mut backup_rets = backup_rets_for_struct_return.unwrap();
+
            // We're using the first return register for the return pointer (like
            // sys v does).
            let mut ret_ptr_return = AbiParam {
@ -296,7 +294,6 @@ pub fn legalize_signature(
                purpose: ArgumentPurpose::StructReturn,
                extension: ArgumentExtension::None,
                location: ArgumentLoc::Unassigned,
-                legalized_to_pointer: false,
            };
            match backup_rets.assign(&ret_ptr_return) {
                ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
@ -504,7 +501,7 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->

    let word_size = StackSize::from(isa.pointer_bytes());
    let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
-        WIN_SHADOW_STACK_SPACE
+        WIN_SHADOW_STACK_SPACE as u32
    } else {
        0
    };
@ -528,60 +525,50 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
        panic!("TODO: windows-fastcall: x86-32 not implemented yet");
    }

+    let csrs = callee_saved_regs_used(isa, func);
+
    // The reserved stack area is composed of:
-    //   return address + frame pointer + all callee-saved registers
+    //   return address + frame pointer + all callee-saved registers + shadow space
    //
    // Pushing the return address is an implicit function of the `call`
    // instruction. Each of the others we will then push explicitly. Then we
    // will adjust the stack pointer to make room for the rest of the required
    // space for this frame.
-    let csrs = callee_saved_regs_used(isa, func);
-    let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
-    let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
-    let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
+    let word_size = isa.pointer_bytes() as usize;
+    let num_fprs = csrs.iter(FPR).len();
+    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;

-    // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
-    if fpsr_stack_size > 0 {
-        csr_stack_size = (csr_stack_size + 15) & !15;
-    }
-
-    func.create_stack_slot(ir::StackSlotData {
-        kind: ir::StackSlotKind::IncomingArg,
-        size: csr_stack_size,
-        offset: Some(-(csr_stack_size as StackOffset)),
-    });
-
-    let is_leaf = func.is_leaf();
-
-    // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
-    if !is_leaf {
-        // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
-        func.create_stack_slot(ir::StackSlotData {
+    // Only create an FPR stack slot if we're going to save FPRs.
+    let fpr_slot = if num_fprs > 0 {
+        // Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it
+        // seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be
+        // through `stack_load` and `stack_store` (see later comment about issue #1198). Even
+        // though in a certain light FPR preserve/restore is "spilling" an argument, regalloc
+        // implies that `SpillSlot` may be eligible for certain optimizations, and we know with
+        // certainty that this space may not be reused in the function, nor moved around.
+        Some(func.create_stack_slot(ir::StackSlotData {
            kind: ir::StackSlotKind::ExplicitSlot,
-            size: WIN_SHADOW_STACK_SPACE,
+            size: (num_fprs * types::F64X2.bytes() as usize) as u32,
            offset: None,
-        });
-    }
-
-    let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
-
-    // Subtract the GPR saved register size from the local size because pushes are used for the saves
-    let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
-
-    // Add CSRs to function signature
-    let reg_type = isa.pointer_type();
-    let sp_arg_index = if fpsr_stack_size > 0 {
-        let sp_arg = ir::AbiParam::special_reg(
-            reg_type,
-            ir::ArgumentPurpose::CalleeSaved,
-            RU::rsp as RegUnit,
-        );
-        let index = func.signature.params.len();
-        func.signature.params.push(sp_arg);
-        Some(index)
+        }))
    } else {
        None
    };
+
+    // TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
+    //       since cranelift does not support spill slots before incoming args
+    func.create_stack_slot(ir::StackSlotData {
+        kind: ir::StackSlotKind::IncomingArg,
+        size: csr_stack_size as u32,
+        offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)),
+    });
+
+    let is_leaf = func.is_leaf();
+    let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+    // Add CSRs to function signature
+    let reg_type = isa.pointer_type();
    let fp_arg = ir::AbiParam::special_reg(
        reg_type,
        ir::ArgumentPurpose::FramePointer,
@ -614,13 +601,19 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
        local_stack_size,
        reg_type,
        &csrs,
-        sp_arg_index.is_some(),
+        fpr_slot.as_ref(),
        isa,
    );

    // Reset the cursor and insert the epilogue
    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
+    insert_common_epilogues(
+        &mut pos,
+        local_stack_size,
+        reg_type,
+        &csrs,
+        fpr_slot.as_ref(),
+    );

    Ok(())
 }
@ -656,20 +649,6 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C

    // Add CSRs to function signature
    let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
-    // On X86-32 all parameters, including vmctx, are passed on stack, and we need
-    // to extract vmctx from the stack before we can save the frame pointer.
-    let sp_arg_index = if isa.pointer_bits() == 32 {
-        let sp_arg = ir::AbiParam::special_reg(
-            reg_type,
-            ir::ArgumentPurpose::CalleeSaved,
-            RU::rsp as RegUnit,
-        );
-        let index = func.signature.params.len();
-        func.signature.params.push(sp_arg);
-        Some(index)
-    } else {
-        None
-    };
    let fp_arg = ir::AbiParam::special_reg(
        reg_type,
        ir::ArgumentPurpose::FramePointer,
@ -687,18 +666,11 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
    // Set up the cursor and insert the prologue
    let entry_block = func.layout.entry_block().expect("missing entry block");
    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
-    insert_common_prologue(
-        &mut pos,
-        local_stack_size,
-        reg_type,
-        &csrs,
-        sp_arg_index.is_some(),
-        isa,
-    );
+    insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);

    // Reset the cursor and insert the epilogue
    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
+    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, None);

    Ok(())
 }
@ -710,18 +682,9 @@ fn insert_common_prologue(
    stack_size: i64,
    reg_type: ir::types::Type,
    csrs: &RegisterSet,
-    has_sp_param: bool,
+    fpr_slot: Option<&StackSlot>,
    isa: &dyn TargetIsa,
 ) {
-    let sp = if has_sp_param {
-        let block = pos.current_block().expect("missing block under cursor");
-        let sp = pos.func.dfg.append_block_param(block, reg_type);
-        pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
-        Some(sp)
-    } else {
-        None
-    };
-
    // If this is a leaf function with zero stack, then there's no need to
    // insert a stack check since it can't overflow anything and
    // forward-progress is guarantee so long as loop are handled anyway.
@ -744,7 +707,7 @@ fn insert_common_prologue(
            None => pos
                .func
                .stack_limit
-                .map(|gv| interpret_gv(pos, gv, sp, scratch)),
+                .map(|gv| interpret_gv(pos, gv, scratch)),
        };
        if let Some(stack_limit_arg) = stack_limit_arg {
            insert_stack_check(pos, stack_size, stack_limit_arg);
@ -817,27 +780,38 @@ fn insert_common_prologue(
        }
    }

-    // With the stack pointer adjusted, save any callee-saved floating point registers via offset
-    // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
+    // Now that RSP is prepared for the function, we can use stack slots:
    let mut last_fpr_save = None;
+    if let Some(fpr_slot) = fpr_slot {
+        debug_assert!(csrs.iter(FPR).len() != 0);

-    for (i, reg) in csrs.iter(FPR).enumerate() {
-        // Append param to entry block
-        let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
+        // `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base
+        // address. We are well after postopt could run, so load the CSR region base once here,
+        // instead of hoping that the addr/store will be combined later.
+        // See also: https://github.com/bytecodealliance/wasmtime/pull/1198
+        let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);

-        // Since regalloc has already run, we must assign a location.
-        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+        // Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
+        // function entry.
+        pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);

-        // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
-        let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
-            + (stack_size % types::F64X2.bytes() as i64);
+        let mut fpr_offset = 0;

-        last_fpr_save = Some(pos.ins().store(
-            ir::MemFlags::trusted(),
-            csr_arg,
-            sp.expect("FPR save requires SP param"),
-            (stack_size - offset) as i32,
-        ));
+        for reg in csrs.iter(FPR) {
+            // Append param to entry Block
+            let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
+
+            // Since regalloc has already run, we must assign a location.
+            pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+
+            last_fpr_save =
+                Some(
+                    pos.ins()
+                        .store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset),
+                );
+
+            fpr_offset += types::F64X2.bytes() as i32;
+        }
    }

    pos.func.prologue_end = Some(
@ -860,55 +834,19 @@ fn insert_common_prologue(
 /// compared to the stack pointer, but currently it serves enough functionality
 /// to get this implemented in `wasmtime` itself. This'll likely get expanded a
 /// bit over time!
-fn interpret_gv(
-    pos: &mut EncCursor,
-    gv: ir::GlobalValue,
-    sp: Option<ir::Value>,
-    scratch: ir::ValueLoc,
-) -> ir::Value {
+fn interpret_gv(pos: &mut EncCursor, gv: ir::GlobalValue, scratch: ir::ValueLoc) -> ir::Value {
    match pos.func.global_values[gv] {
-        ir::GlobalValueData::VMContext => {
-            let vmctx_index = pos
-                .func
-                .signature
-                .special_param_index(ir::ArgumentPurpose::VMContext)
-                .expect("no vmcontext parameter found");
-            match pos.func.signature.params[vmctx_index] {
-                AbiParam {
-                    location: ArgumentLoc::Reg(_),
-                    ..
-                } => {
-                    let entry = pos.func.layout.entry_block().unwrap();
-                    pos.func.dfg.block_params(entry)[vmctx_index]
-                }
-                AbiParam {
-                    location: ArgumentLoc::Stack(offset),
-                    value_type,
-                    ..
-                } => {
-                    let offset =
-                        offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
-                    // The following access can be marked `trusted` because it is a load of an argument. We
-                    // know it is safe because it was safe to write it in preparing this function call.
-                    let ret =
-                        pos.ins()
-                            .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
-                    pos.func.locations[ret] = scratch;
-                    return ret;
-                }
-                AbiParam {
-                    location: ArgumentLoc::Unassigned,
-                    ..
-                } => unreachable!(),
-            }
-        }
+        ir::GlobalValueData::VMContext => pos
+            .func
+            .special_param(ir::ArgumentPurpose::VMContext)
+            .expect("no vmcontext parameter found"),
        ir::GlobalValueData::Load {
            base,
            offset,
            global_type,
            readonly: _,
        } => {
-            let base = interpret_gv(pos, base, sp, scratch);
+            let base = interpret_gv(pos, base, scratch);
            let ret = pos
                .ins()
                .load(global_type, ir::MemFlags::trusted(), base, offset);
@ -973,13 +911,13 @@ fn insert_common_epilogues(
    stack_size: i64,
    reg_type: ir::types::Type,
    csrs: &RegisterSet,
-    sp_arg_index: Option<usize>,
+    fpr_slot: Option<&StackSlot>,
 ) {
    while let Some(block) = pos.next_block() {
        pos.goto_last_inst(block);
        if let Some(inst) = pos.current_inst() {
            if pos.func.dfg[inst].opcode().is_return() {
-                insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, sp_arg_index);
+                insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, fpr_slot);
            }
        }
    }
@ -993,8 +931,56 @@ fn insert_common_epilogue(
    pos: &mut EncCursor,
    reg_type: ir::types::Type,
    csrs: &RegisterSet,
-    sp_arg_index: Option<usize>,
+    fpr_slot: Option<&StackSlot>,
 ) {
+    // Even though instructions to restore FPRs are inserted first, we have to append them after
+    // restored GPRs to satisfy parameter order in the return.
+    let mut restored_fpr_values = Vec::new();
+
+    // Restore FPRs before we move RSP and invalidate stack slots.
+    let mut first_fpr_load = None;
+    if let Some(fpr_slot) = fpr_slot {
+        debug_assert!(csrs.iter(FPR).len() != 0);
+
+        // `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base
+        // address. We are well after postopt could run, so load the CSR region base once here,
+        // instead of hoping that the addr/store will be combined later.
+        //
+        // See also: https://github.com/bytecodealliance/wasmtime/pull/1198
+        let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
+
+        first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
+
+        // Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
+        // function exit.
+        pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
+
+        let mut fpr_offset = 0;
+
+        for reg in csrs.iter(FPR) {
+            let value = pos.ins().load(
+                types::F64X2,
+                ir::MemFlags::trusted(),
+                stack_addr,
+                fpr_offset,
+            );
+            fpr_offset += types::F64X2.bytes() as i32;
+
+            // Unlike GPRs before, we don't need to step back after reach restoration because FPR
+            // restoration is order-insensitive. Furthermore: we want GPR restoration to begin
+            // after FPR restoration, so that stack adjustments occur after we're done relying on
+            // StackSlot validity.
+
+            pos.func.locations[value] = ir::ValueLoc::Reg(reg);
+            restored_fpr_values.push(value);
+        }
+    }
+
+    let mut sp_adjust_inst = None;
+    if stack_size > 0 {
+        sp_adjust_inst = Some(pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)));
+    }
+
    // Insert the pop of the frame pointer
    let fp_pop = pos.ins().x86_pop(reg_type);
    let fp_pop_inst = pos.prev_inst().unwrap();
@ -1005,47 +991,13 @@ fn insert_common_epilogue(
    let mut first_csr_pop_inst = None;
    for reg in csrs.iter(GPR) {
        let csr_pop = pos.ins().x86_pop(reg_type);
-        first_csr_pop_inst = pos.prev_inst();
-        assert!(first_csr_pop_inst.is_some());
+        first_csr_pop_inst = Some(pos.prev_inst().unwrap());
        pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
        pos.func.dfg.append_inst_arg(inst, csr_pop);
    }

-    // Insert the adjustment of SP
-    let mut sp_adjust_inst = None;
-    if stack_size > 0 {
-        pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
-        sp_adjust_inst = pos.prev_inst();
-        assert!(sp_adjust_inst.is_some());
-    }
-
-    let mut first_fpr_load = None;
-    if let Some(index) = sp_arg_index {
-        let sp = pos
-            .func
-            .dfg
-            .block_params(pos.func.layout.entry_block().unwrap())[index];
-
-        // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
-        for (i, reg) in csrs.iter(FPR).enumerate() {
-            // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
-            let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
-                + (stack_size % types::F64X2.bytes() as i64);
-
-            let value = pos.ins().load(
-                types::F64X2,
-                ir::MemFlags::trusted(),
-                sp,
-                (stack_size - offset) as i32,
-            );
-
-            first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
-
-            pos.func.locations[value] = ir::ValueLoc::Reg(reg);
-            pos.func.dfg.append_inst_arg(inst, value);
-        }
-    } else {
-        assert!(csrs.iter(FPR).len() == 0);
+    for value in restored_fpr_values.into_iter() {
+        pos.func.dfg.append_inst_arg(inst, value);
    }

    pos.func.epilogues_start.push(
--- a/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
@ -13,7 +13,6 @@ use crate::isa::encoding::base_size;
 use crate::isa::encoding::{Encoding, RecipeSizing};
 use crate::isa::RegUnit;
 use crate::isa::{self, TargetIsa};
-use crate::legalizer::expand_as_libcall;
 use crate::predicates;
 use crate::regalloc::RegDiversions;

@ -247,20 +246,6 @@ fn size_with_inferred_rex_for_inreg0_inreg1(
    sizing.base_size + if needs_rex { 1 } else { 0 }
 }

-/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
-fn size_with_inferred_rex_for_inreg1_inreg2(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
-        || test_input(2, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
 /// Infers whether a dynamic REX prefix will be emitted, based on a single
 /// input register and a single output register.
 fn size_with_inferred_rex_for_inreg0_outreg0(
@ -1196,10 +1181,10 @@ fn convert_extractlane(
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

-    if let ir::InstructionData::BinaryImm8 {
+    if let ir::InstructionData::ExtractLane {
        opcode: ir::Opcode::Extractlane,
        arg,
-        imm: lane,
+        lane,
    } = pos.func.dfg[inst]
    {
        // NOTE: the following legalization assumes that the upper bits of the XMM register do
@ -1252,10 +1237,10 @@ fn convert_insertlane(
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

-    if let ir::InstructionData::TernaryImm8 {
+    if let ir::InstructionData::InsertLane {
        opcode: ir::Opcode::Insertlane,
        args: [vector, replacement],
-        imm: lane,
+        lane,
    } = pos.func.dfg[inst]
    {
        let value_type = pos.func.dfg.value_type(vector);
@ -1270,7 +1255,7 @@ fn convert_insertlane(
                    pos.func
                        .dfg
                        .replace(inst)
-                        .x86_insertps(vector, replacement, immediate)
+                        .x86_insertps(vector, immediate, replacement)
                }
                F64X2 => {
                    let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
@ -1298,7 +1283,7 @@ fn convert_insertlane(
            pos.func
                .dfg
                .replace(inst)
-                .x86_pinsr(vector, replacement, lane);
+                .x86_pinsr(vector, lane, replacement);
        }
    }
 }
@ -1333,39 +1318,6 @@ fn convert_ineg(
    }
 }

-fn expand_dword_to_xmm<'f>(
-    pos: &mut FuncCursor<'_>,
-    arg: ir::Value,
-    arg_type: ir::Type,
-) -> ir::Value {
-    if arg_type == I64 {
-        let (arg_lo, arg_hi) = pos.ins().isplit(arg);
-        let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
-        let arg = pos.ins().insertlane(arg, arg_hi, 1);
-        let arg = pos.ins().raw_bitcast(I64X2, arg);
-        arg
-    } else {
-        pos.ins().bitcast(I64X2, arg)
-    }
-}
-
-fn contract_dword_from_xmm<'f>(
-    pos: &mut FuncCursor<'f>,
-    inst: ir::Inst,
-    ret: ir::Value,
-    ret_type: ir::Type,
-) {
-    if ret_type == I64 {
-        let ret = pos.ins().raw_bitcast(I32X4, ret);
-        let ret_lo = pos.ins().extractlane(ret, 0);
-        let ret_hi = pos.ins().extractlane(ret, 1);
-        pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
-    } else {
-        let ret = pos.ins().extractlane(ret, 0);
-        pos.func.dfg.replace(inst).ireduce(ret_type, ret);
-    }
-}
-
 // Masks for i8x16 unsigned right shift.
 static USHR_MASKS: [u8; 128] = [
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@ -1427,24 +1379,7 @@ fn convert_ushr(
        } else if arg0_type.is_vector() {
            // x86 has encodings for these shifts.
            pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
-        } else if arg0_type == I64 {
-            // 64 bit shifts need to be legalized on x86_32.
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.has_sse41() {
-                // if we have pinstrq/pextrq (SSE 4.1), legalize to that
-                let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
-                let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
-                let shifted = pos.ins().x86_psrl(value, amount);
-                contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
-            } else {
-                // otherwise legalize to libcall
-                expand_as_libcall(inst, func, isa);
-            }
        } else {
-            // Everything else should be already legal.
            unreachable!()
        }
    }
@ -1511,76 +1446,12 @@ fn convert_ishl(
        } else if arg0_type.is_vector() {
            // x86 has encodings for these shifts.
            pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
-        } else if arg0_type == I64 {
-            // 64 bit shifts need to be legalized on x86_32.
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.has_sse41() {
-                // if we have pinstrq/pextrq (SSE 4.1), legalize to that
-                let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
-                let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
-                let shifted = pos.ins().x86_psll(value, amount);
-                contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
-            } else {
-                // otherwise legalize to libcall
-                expand_as_libcall(inst, func, isa);
-            }
        } else {
-            // Everything else should be already legal.
            unreachable!()
        }
    }
 }

-/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
-fn convert_i64x2_imul(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Binary {
-        opcode: ir::Opcode::Imul,
-        args: [arg0, arg1],
-    } = pos.func.dfg[inst]
-    {
-        let ty = pos.func.dfg.ctrl_typevar(inst);
-        if ty == I64X2 {
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
-                // If we have certain AVX512 features, we can lower this instruction simply.
-                pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
-            } else {
-                // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
-                // 64-bit lane into 32-bit high and low sections using shifting and then performs
-                // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
-                // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
-                let high0 = pos.ins().ushr_imm(arg0, 32);
-                let mul0 = pos.ins().x86_pmuludq(high0, arg1);
-                let high1 = pos.ins().ushr_imm(arg1, 32);
-                let mul1 = pos.ins().x86_pmuludq(high1, arg0);
-                let addhigh = pos.ins().iadd(mul0, mul1);
-                let high = pos.ins().ishl_imm(addhigh, 32);
-                let low = pos.ins().x86_pmuludq(arg0, arg1);
-                pos.func.dfg.replace(inst).iadd(low, high);
-            }
-        } else {
-            unreachable!(
-                "{} should be encodable; it cannot be legalized by convert_i64x2_imul",
-                pos.func.dfg.display_inst(inst, None)
-            );
-        }
-    }
-}
-
 fn expand_tls_value(
    inst: ir::Inst,
    func: &mut ir::Function,
--- a/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
@ -23,7 +23,6 @@ use crate::result::CodegenResult;
 use crate::timing;
 use alloc::borrow::Cow;
 use alloc::boxed::Box;
-use core::any::Any;
 use core::fmt;
 use target_lexicon::{PointerWidth, Triple};

@ -54,23 +53,12 @@ fn isa_constructor(
        PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
        PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
    };
-
-    let isa_flags = settings::Flags::new(&shared_flags, builder);
-
-    if isa_flags.use_new_backend() {
-        #[cfg(not(feature = "x64"))]
-        panic!("new backend x86 support not included by cargo features!");
-
-        #[cfg(feature = "x64")]
-        super::x64::isa_builder(triple).finish(shared_flags)
-    } else {
-        Box::new(Isa {
-            triple,
-            isa_flags,
-            shared_flags,
-            cpumode: level1,
-        })
-    }
+    Box::new(Isa {
+        triple,
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
 }

 impl TargetIsa for Isa {
@ -185,10 +173,6 @@ impl TargetIsa for Isa {
    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
        Some(unwind::systemv::create_cie())
    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
 }

 impl fmt::Display for Isa {
--- a/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
@ -28,7 +28,22 @@ pub(crate) fn create_unwind_info(
    let mut prologue_size = 0;
    let mut unwind_codes = Vec::new();
    let mut found_end = false;
-    let mut xmm_save_count: u8 = 0;
+
+    // Have we saved at least one FPR? if so, we might have to check additional constraints.
+    let mut saved_fpr = false;
+
+    // In addition to the min offset for a callee-save, we need to know the offset from the
+    // frame base to the stack pointer, so that we can record an unwind offset that spans only
+    // to the end of callee-save space.
+    let mut static_frame_allocation_size = 0u32;
+
+    // For the time being, FPR preservation is split into a stack_addr and later store/load.
+    // Store the register used for stack store and ensure it is the same register with no
+    // intervening changes to the frame size.
+    let mut callee_save_region_reg = None;
+    // Also record the callee-save region's offset from RSP, because it must be added to FPR
+    // save offsets to compute an offset from the frame base.
+    let mut callee_save_offset = None;

    for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
        // x64 ABI prologues cannot exceed 255 bytes in length
@ -45,6 +60,8 @@ pub(crate) fn create_unwind_info(
            InstructionData::Unary { opcode, arg } => {
                match opcode {
                    Opcode::X86Push => {
+                        static_frame_allocation_size += 8;
+
                        unwind_codes.push(UnwindCode::PushRegister {
                            offset: unwind_offset,
                            reg: GPR.index_of(func.locations[arg].unwrap_reg()) as u8,
@ -53,6 +70,7 @@ pub(crate) fn create_unwind_info(
                    Opcode::AdjustSpDown => {
                        let stack_size =
                            stack_size.expect("expected a previous stack size instruction");
+                        static_frame_allocation_size += stack_size;

                        // This is used when calling a stack check function
                        // We need to track the assignment to RAX which has the size of the stack
@ -67,6 +85,10 @@ pub(crate) fn create_unwind_info(
            InstructionData::CopySpecial { src, dst, .. } => {
                if let Some(frame_register) = frame_register {
                    if src == (RU::rsp as RegUnit) && dst == frame_register {
+                        // Constructing an rbp-based stack frame, so the static frame
+                        // allocation restarts at 0 from here.
+                        static_frame_allocation_size = 0;
+
                        unwind_codes.push(UnwindCode::SetFramePointer {
                            offset: unwind_offset,
                            sp_offset: 0,
@ -91,7 +113,7 @@ pub(crate) fn create_unwind_info(
                        let imm: i64 = imm.into();
                        assert!(imm <= core::u32::MAX as i64);

-                        stack_size = Some(imm as u32);
+                        static_frame_allocation_size += imm as u32;

                        unwind_codes.push(UnwindCode::StackAlloc {
                            offset: unwind_offset,
@ -101,27 +123,52 @@ pub(crate) fn create_unwind_info(
                    _ => {}
                }
            }
+            InstructionData::StackLoad {
+                opcode: Opcode::StackAddr,
+                stack_slot,
+                offset: _,
+            } => {
+                let result = func.dfg.inst_results(inst).get(0).unwrap();
+                if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
+                    callee_save_region_reg = Some(frame_reg);
+
+                    // Figure out the offset in the call frame that `frame_reg` will have.
+                    let frame_size = func
+                        .stack_slots
+                        .layout_info
+                        .expect("func's stack slots have layout info if stack operations exist")
+                        .frame_size;
+                    // Because we're well after the prologue has been constructed, stack slots
+                    // must have been laid out...
+                    let slot_offset = func.stack_slots[stack_slot]
+                        .offset
+                        .expect("callee-save slot has an offset computed");
+                    let frame_offset = frame_size as i32 + slot_offset;
+
+                    callee_save_offset = Some(frame_offset as u32);
+                }
+            }
            InstructionData::Store {
                opcode: Opcode::Store,
                args: [arg1, arg2],
+                flags: _flags,
                offset,
-                ..
            } => {
-                if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
+                if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
                    (func.locations[arg1], func.locations[arg2])
                {
-                    // If this is a save of an FPR, record an unwind operation
-                    // Note: the stack_offset here is relative to an adjusted SP
-                    // This will be fixed up later to be based on the frame pointer offset
-                    if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
-                        let offset: i32 = offset.into();
-                        unwind_codes.push(UnwindCode::SaveXmm {
-                            offset: unwind_offset,
-                            reg: src as u8,
-                            stack_offset: offset as u32,
-                        });
-
-                        xmm_save_count += 1;
+                    if Some(base_ru) == callee_save_region_reg {
+                        let offset_int: i32 = offset.into();
+                        assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
+                        let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
+                        if FPR.contains(ru) {
+                            saved_fpr = true;
+                            unwind_codes.push(UnwindCode::SaveXmm {
+                                offset: unwind_offset,
+                                reg: ru as u8,
+                                stack_offset: offset_int,
+                            });
+                        }
                    }
                }
            }
@ -136,45 +183,41 @@ pub(crate) fn create_unwind_info(

    assert!(found_end);

-    // When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
-    // register minus some offset, forming a "base address". This attempts to calculate the frame register offset
-    // while updating the XMM save offsets to be relative from this "base address" rather than RSP.
-    let mut frame_register_offset = 0;
-    if frame_register.is_some() && xmm_save_count > 0 {
-        // Determine the number of 16-byte slots used for all CSRs (including GPRs)
-        // The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
-        // Assumption: each FPR is stored at a lower address than the previous one
-        let mut last_stack_offset = None;
-        let mut fpr_save_count: u8 = 0;
-        let mut gpr_push_count: u8 = 0;
-        for code in unwind_codes.iter_mut() {
-            match code {
-                UnwindCode::SaveXmm { stack_offset, .. } => {
-                    if let Some(last) = last_stack_offset {
-                        assert!(last > *stack_offset);
-                    }
-                    last_stack_offset = Some(*stack_offset);
-                    fpr_save_count += 1;
-                    *stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
-                }
-                UnwindCode::PushRegister { .. } => {
-                    gpr_push_count += 1;
-                }
-                _ => {}
-            }
+    if saved_fpr {
+        if static_frame_allocation_size > 240 && saved_fpr {
+            warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
+                This is a Cranelift implementation limit, see \
+                https://github.com/bytecodealliance/wasmtime/issues/1475",
+                static_frame_allocation_size);
+            return Err(CodegenError::ImplLimitExceeded);
        }
-        assert_eq!(fpr_save_count, xmm_save_count);
-
-        // Account for alignment space when there's an odd number of GPR pushes
-        // Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
-        frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
+        // Only test static frame size is 16-byte aligned when an FPR is saved to avoid
+        // panicking when alignment is elided because no FPRs are saved and no child calls are
+        // made.
+        assert!(
+            static_frame_allocation_size % 16 == 0,
+            "static frame allocation must be a multiple of 16"
+        );
    }

+    // Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
+    // one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
+    // Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
+    // frames are inexpressible, and we cannot actually compile the function. In case there are
+    // no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
+    // will actually check it. This, then, avoids panics when compiling functions with large
+    // call frames.
+    let reported_frame_offset = if saved_fpr {
+        (static_frame_allocation_size / 16) as u8
+    } else {
+        0
+    };
+
    Ok(Some(UnwindInfo {
        flags: 0, // this assumes cranelift functions have no SEH handlers
        prologue_size: prologue_size as u8,
        frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
-        frame_register_offset,
+        frame_register_offset: reported_frame_offset,
        unwind_codes,
    }))
 }
@ -241,7 +284,7 @@ mod tests {
                    },
                    UnwindCode::StackAlloc {
                        offset: 9,
-                        size: 64
+                        size: 64 + 32
                    }
                ]
            }
@ -260,7 +303,7 @@ mod tests {
                0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
                0x05, // Frame register + offset (RBP with 0 offset)
                0x09, // Prolog offset
-                0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
+                0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
                0x05, // Prolog offset
                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
@ -306,7 +349,7 @@ mod tests {
                    },
                    UnwindCode::StackAlloc {
                        offset: 27,
-                        size: 10000
+                        size: 10000 + 32
                    }
                ]
            }
@ -326,8 +369,8 @@ mod tests {
                0x05, // Frame register + offset (RBP with 0 offset)
                0x1B, // Prolog offset
                0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
-                0xE2, // Low size byte
-                0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
+                0xE6, // Low size byte
+                0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
                0x05, // Prolog offset
                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
@ -371,7 +414,7 @@ mod tests {
                    },
                    UnwindCode::StackAlloc {
                        offset: 27,
-                        size: 1000000
+                        size: 1000000 + 32
                    }
                ]
            }
@ -391,10 +434,10 @@ mod tests {
                0x05, // Frame register + offset (RBP with 0 offset)
                0x1B, // Prolog offset
                0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
-                0x40, // Byte 1 of size
+                0x60, // Byte 1 of size
                0x42, // Byte 2 of size
                0x0F, // Byte 3 of size
-                0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
+                0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
                0x05, // Prolog offset
                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
--- a/third_party/rust/cranelift-codegen/src/legalizer/boundary.rs
+++ b/third_party/rust/cranelift-codegen/src/legalizer/boundary.rs
@ -504,13 +504,6 @@ where
            // this value.
            pos.ins().with_results([into_result]).ireduce(ty, arg)
        }
-        // ABI argument is a pointer to the value we want.
-        ValueConversion::Pointer(abi_ty) => {
-            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
-            pos.ins()
-                .with_results([into_result])
-                .load(ty, MemFlags::new(), arg, 0)
-        }
    }
 }

@ -570,18 +563,6 @@ fn convert_to_abi<PutArg>(
            let arg = pos.ins().uextend(abi_ty, value);
            convert_to_abi(pos, cfg, arg, put_arg);
        }
-        ValueConversion::Pointer(abi_ty) => {
-            // Note: This conversion can only happen for call arguments,
-            // so we can allocate the value on stack safely.
-            let stack_slot = pos.func.create_stack_slot(StackSlotData {
-                kind: StackSlotKind::ExplicitSlot,
-                size: ty.bytes(),
-                offset: None,
-            });
-            let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0);
-            pos.ins().store(MemFlags::new(), value, arg, 0);
-            convert_to_abi(pos, cfg, arg, put_arg);
-        }
    }
 }

@ -776,6 +757,12 @@ pub fn handle_call_abi(
    {
        legalize_sret_call(isa, pos, sig_ref, inst);
    } else {
+        // OK, we need to fix the call arguments to match the ABI signature.
+        let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
+        legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+            func.dfg.signatures[sig_ref].params[abi_arg]
+        });
+
        if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
            inst = legalize_inst_results(pos, |func, abi_res| {
                func.dfg.signatures[sig_ref].returns[abi_res]
@ -783,13 +770,6 @@ pub fn handle_call_abi(
        }
    }

-    // Go back and fix the call arguments to match the ABI signature.
-    pos.goto_inst(inst);
-    let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
-    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
-        func.dfg.signatures[sig_ref].params[abi_arg]
-    });
-
    debug_assert!(
        check_call_signature(&pos.func.dfg, inst).is_ok(),
        "Signature still wrong: {}, {}{}",
@ -834,12 +814,7 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
    pos.use_srcloc(inst);

    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
-        let arg = func.signature.returns[abi_arg];
-        debug_assert!(
-            !arg.legalized_to_pointer,
-            "Return value cannot be legalized to pointer"
-        );
-        arg
+        func.signature.returns[abi_arg]
    });
    // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
    // the legalized signature. These values should simply be propagated from the entry block
--- a/third_party/rust/cranelift-codegen/src/legalizer/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/legalizer/mod.rs
@ -35,7 +35,7 @@ mod table;
 use self::call::expand_call;
 use self::globalvalue::expand_global_value;
 use self::heap::expand_heap_addr;
-pub(crate) use self::libcall::expand_as_libcall;
+use self::libcall::expand_as_libcall;
 use self::table::expand_table_addr;

 enum LegalizeInstResult {
--- a/third_party/rust/cranelift-codegen/src/lib.rs
+++ b/third_party/rust/cranelift-codegen/src/lib.rs
@ -99,12 +99,12 @@ mod iterators;
 mod legalizer;
 mod licm;
 mod nan_canonicalization;
+mod num_uses;
 mod partition_slice;
 mod postopt;
 mod predicates;
 mod redundant_reload_remover;
 mod regalloc;
-mod remove_constant_phis;
 mod result;
 mod scoped_hash_map;
 mod simple_gvn;
@ -114,9 +114,6 @@ mod topo_order;
 mod unreachable_code;
 mod value_label;

-#[cfg(feature = "enable-peepmatic")]
-mod peepmatic;
-
 pub use crate::result::{CodegenError, CodegenResult};

 /// Version number of this crate.
--- a/third_party/rust/cranelift-codegen/src/machinst/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi.rs
@ -12,15 +12,6 @@ pub trait ABIBody {
    /// The instruction type for the ISA associated with this ABI.
    type I: VCodeInst;

-    /// Does the ABI-body code need a temp reg? One will be provided to `init()`
-    /// as the `maybe_tmp` arg if so.
-    fn temp_needed(&self) -> bool;
-
-    /// Initialize. This is called after the ABIBody is constructed because it
-    /// may be provided with a temp vreg, which can only be allocated once the
-    /// lowering context exists.
-    fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
-
    /// Get the settings controlling this function's compilation.
    fn flags(&self) -> &settings::Flags;

@ -43,13 +34,6 @@ pub trait ABIBody {
    /// register.
    fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

-    /// Generate any setup instruction needed to save values to the
-    /// return-value area. This is usually used when were are multiple return
-    /// values or an otherwise large return value that must be passed on the
-    /// stack; typically the ABI specifies an extra hidden argument that is a
-    /// pointer to that memory.
-    fn gen_retval_area_setup(&self) -> Option<Self::I>;
-
    /// Generate an instruction which copies a source register to a return value slot.
    fn gen_copy_reg_to_retval(
        &self,
@ -114,10 +98,7 @@ pub trait ABIBody {
    fn gen_epilogue(&self) -> Vec<Self::I>;

    /// Returns the full frame size for the given function, after prologue emission has run. This
-    /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
-    /// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
-    /// This is used for ABI variants where the client generates prologue/epilogue code, as in
-    /// Baldrdash (SpiderMonkey integration).
+    /// comprises the spill space, incoming argument space, alignment padding, etc.
    fn frame_size(&self) -> u32;

    /// Get the spill-slot size.
@ -151,29 +132,24 @@ pub trait ABICall {
    /// Get the number of arguments expected.
    fn num_args(&self) -> usize;

-    /// Emit a copy of an argument value from a source register, prior to the call.
-    fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
+    /// Copy an argument value from a source register, prior to the call.
+    fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
        &self,
        ctx: &mut C,
        idx: usize,
        from_reg: Reg,
-    );
+    ) -> Vec<Self::I>;

-    /// Emit a copy a return value into a destination register, after the call returns.
-    fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
-        &self,
-        ctx: &mut C,
-        idx: usize,
-        into_reg: Writable<Reg>,
-    );
+    /// Copy a return value into a destination register, after the call returns.
+    fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

-    /// Emit code to pre-adjust the stack, prior to argument copies and call.
-    fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
+    /// Pre-adjust the stack, prior to argument copies and call.
+    fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;

-    /// Emit code to post-adjust the satck, after call return and return-value copies.
-    fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
+    /// Post-adjust the satck, after call return and return-value copies.
+    fn gen_stack_post_adjust(&self) -> Vec<Self::I>;

-    /// Emit the call itself.
+    /// Generate the call itself.
    ///
    /// The returned instruction should have proper use- and def-sets according
    /// to the argument registers, return-value registers, and clobbered
@ -183,8 +159,5 @@ pub trait ABICall {
    /// registers are also logically defs, but should never be read; their
    /// values are "defined" (to the regalloc) but "undefined" in every other
    /// sense.)
-    ///
-    /// This function should only be called once, as it is allowed to re-use
-    /// parts of the ABICall object in emitting instructions.
-    fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
+    fn gen_call(&self) -> Vec<Self::I>;
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/adapter.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/adapter.rs
@ -10,7 +10,6 @@ use crate::settings::Flags;
 #[cfg(feature = "testing_hooks")]
 use crate::regalloc::RegDiversions;

-use core::any::Any;
 use std::borrow::Cow;
 use std::fmt;
 use target_lexicon::Triple;
@ -128,8 +127,4 @@ impl TargetIsa for TargetIsaAdapter {
    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
        self.backend.unsigned_sub_overflow_condition()
    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs
@ -1,624 +1,59 @@
 //! Computation of basic block order in emitted code.
-//!
-//! This module handles the translation from CLIF BBs to VCode BBs.
-//!
-//! The basic idea is that we compute a sequence of "lowered blocks" that
-//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
-//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
-//! moves for phi-nodes on every block-to-block transfer; these blocks always
-//! conceptually exist, but may be merged with an "original" CLIF block (and
-//! hence not actually exist; this is equivalent to inserting the blocks only on
-//! critical edges).
-//!
-//! In other words, starting from a CFG like this (where each "CLIF block" and
-//! "(edge N->M)" is a separate basic block):
-//!
-//! ```plain
-//!
-//!              CLIF block 0
-//!               /           \
-//!       (edge 0->1)         (edge 0->2)
-//!              |                |
-//!       CLIF block 1         CLIF block 2
-//!              \                /
-//!           (edge 1->3)   (edge 2->3)
-//!                   \      /
-//!                 CLIF block 3
-//! ```
-//!
-//! We can produce a CFG of lowered blocks like so:
-//!
-//! ```plain
-//!            +--------------+
-//!            | CLIF block 0 |
-//!            +--------------+
-//!               /           \
-//!     +--------------+     +--------------+
-//!     | (edge 0->1)  |     |(edge 0->2)   |
-//!     | CLIF block 1 |     | CLIF block 2 |
-//!     +--------------+     +--------------+
-//!              \                /
-//!          +-----------+ +-----------+
-//!          |(edge 1->3)| |(edge 2->3)|
-//!          +-----------+ +-----------+
-//!                   \      /
-//!                +------------+
-//!                |CLIF block 3|
-//!                +------------+
-//! ```
-//!
-//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
-//! blocks' original bodies, but the out-edges could not because for simplicity
-//! in the successor-function definition, we only ever merge an edge onto one
-//! side of an original CLIF block.)
-//!
-//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
-//! block prepended or appended with an edge block (never both, though), or just
-//! an edge block.
-//!
-//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
-//! (never actually materialized, just defined by a "successors" function), and
-//! compute the reverse postorder.
-//!
-//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
-//! example, consider any information about whether edge blocks will actually
-//! have content, because this computation happens as part of lowering *before*
-//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
-//! particular edge. But it works relatively well and is conceptually simple.
-//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
-//! branch editing that in practice elides empty blocks and simplifies some of
-//! the other redundancies that this scheme produces.

-use crate::entity::SecondaryMap;
-use crate::fx::{FxHashMap, FxHashSet};
-use crate::ir::{Block, Function, Inst, Opcode};
-use crate::machinst::lower::visit_block_succs;
 use crate::machinst::*;
+use regalloc::{BlockIx, Function};

-use log::debug;
-use smallvec::SmallVec;
-
-/// Mapping from CLIF BBs to VCode BBs.
-#[derive(Debug)]
-pub struct BlockLoweringOrder {
-    /// Lowered blocks, in BlockIndex order. Each block is some combination of
-    /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
-    /// see [LoweredBlock] for details.
-    lowered_order: Vec<LoweredBlock>,
-    /// Successors for all lowered blocks, in one serialized vector. Indexed by
-    /// the ranges in `lowered_succ_ranges`.
-    lowered_succs: Vec<(Inst, LoweredBlock)>,
-    /// BlockIndex values for successors for all lowered blocks, in the same
-    /// order as `lowered_succs`.
-    lowered_succ_indices: Vec<(Inst, BlockIndex)>,
-    /// Ranges in `lowered_succs` giving the successor lists for each lowered
-    /// block. Indexed by lowering-order index (`BlockIndex`).
-    lowered_succ_ranges: Vec<(usize, usize)>,
-    /// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
-    /// some CLIF BBs may not be lowered; in particular, we skip unreachable
-    /// blocks.
-    orig_map: SecondaryMap<Block, Option<BlockIndex>>,
+/// Simple reverse postorder-based block order emission.
+///
+/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
+/// construction algorithm.
+struct BlockRPO {
+    visited: Vec<bool>,
+    postorder: Vec<BlockIndex>,
+    deferred_last: Option<BlockIndex>,
 }

-/// The origin of a block in the lowered block-order: either an original CLIF
-/// block, or an inserted edge-block, or a combination of the two if an edge is
-/// non-critical.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum LoweredBlock {
-    /// Block in original CLIF, with no merged edge-blocks.
-    Orig {
-        /// Original CLIF block.
-        block: Block,
-    },
-    /// Block in the original CLIF, plus edge-block to one succ (which is the
-    /// one successor of the original block).
-    OrigAndEdge {
-        /// The original CLIF block contained in this lowered block.
-        block: Block,
-        /// The edge (jump) instruction transitioning from this block
-        /// to the next, i.e., corresponding to the included edge-block. This
-        /// will be an instruction in `block`.
-        edge_inst: Inst,
-        /// The successor CLIF block.
-        succ: Block,
-    },
-    /// Block in the original CLIF, preceded by edge-block from one pred (which
-    /// is the one pred of the original block).
-    EdgeAndOrig {
-        /// The previous CLIF block, i.e., the edge block's predecessor.
-        pred: Block,
-        /// The edge (jump) instruction corresponding to the included
-        /// edge-block. This will be an instruction in `pred`.
-        edge_inst: Inst,
-        /// The original CLIF block included in this lowered block.
-        block: Block,
-    },
-    /// Split critical edge between two CLIF blocks. This lowered block does not
-    /// correspond to any original CLIF blocks; it only serves as an insertion
-    /// point for work to happen on the transition from `pred` to `succ`.
-    Edge {
-        /// The predecessor CLIF block.
-        pred: Block,
-        /// The edge (jump) instruction corresponding to this edge's transition.
-        /// This will be an instruction in `pred`.
-        edge_inst: Inst,
-        /// The successor CLIF block.
-        succ: Block,
-    },
-}
-
-impl LoweredBlock {
-    /// The associated original (CLIF) block included in this lowered block, if
-    /// any.
-    pub fn orig_block(self) -> Option<Block> {
-        match self {
-            LoweredBlock::Orig { block, .. }
-            | LoweredBlock::OrigAndEdge { block, .. }
-            | LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
-            LoweredBlock::Edge { .. } => None,
+impl BlockRPO {
+    fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
+        BlockRPO {
+            visited: vec![false; vcode.num_blocks()],
+            postorder: vec![],
+            deferred_last: None,
        }
    }

-    /// The associated in-edge, if any.
-    pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
-        match self {
-            LoweredBlock::EdgeAndOrig {
-                pred,
-                edge_inst,
-                block,
-            } => Some((pred, edge_inst, block)),
-            _ => None,
+    fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
+        self.visited[block as usize] = true;
+        for succ in vcode.succs(block) {
+            if !self.visited[*succ as usize] {
+                self.visit(vcode, *succ);
+            }
        }
+
+        for i in vcode.block_insns(BlockIx::new(block)) {
+            if vcode.get_insn(i).is_epilogue_placeholder() {
+                debug_assert!(self.deferred_last.is_none());
+                self.deferred_last = Some(block);
+                return;
+            }
+        }
+
+        self.postorder.push(block);
    }

-    /// the associated out-edge, if any. Also includes edge-only blocks.
-    pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
-        match self {
-            LoweredBlock::OrigAndEdge {
-                block,
-                edge_inst,
-                succ,
-            } => Some((block, edge_inst, succ)),
-            LoweredBlock::Edge {
-                pred,
-                edge_inst,
-                succ,
-            } => Some((pred, edge_inst, succ)),
-            _ => None,
+    fn rpo(self) -> Vec<BlockIndex> {
+        let mut rpo = self.postorder;
+        rpo.reverse();
+        if let Some(block) = self.deferred_last {
+            rpo.push(block);
        }
+        rpo
    }
 }

-impl BlockLoweringOrder {
-    /// Compute and return a lowered block order for `f`.
-    pub fn new(f: &Function) -> BlockLoweringOrder {
-        debug!("BlockLoweringOrder: function body {:?}", f);
-
-        // Step 1: compute the in-edge and out-edge count of every block.
-        let mut block_in_count = SecondaryMap::with_default(0);
-        let mut block_out_count = SecondaryMap::with_default(0);
-
-        // Cache the block successors to avoid re-examining branches below.
-        let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
-        let mut block_succ_range = SecondaryMap::with_default((0, 0));
-        let mut fallthrough_return_block = None;
-        for block in f.layout.blocks() {
-            let block_succ_start = block_succs.len();
-            visit_block_succs(f, block, |inst, succ| {
-                block_out_count[block] += 1;
-                block_in_count[succ] += 1;
-                block_succs.push((inst, succ));
-            });
-            let block_succ_end = block_succs.len();
-            block_succ_range[block] = (block_succ_start, block_succ_end);
-
-            for inst in f.layout.block_likely_branches(block) {
-                if f.dfg[inst].opcode() == Opcode::Return {
-                    // Implicit output edge for any return.
-                    block_out_count[block] += 1;
-                }
-                if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
-                    // Fallthrough return block must come last.
-                    debug_assert!(fallthrough_return_block == None);
-                    fallthrough_return_block = Some(block);
-                }
-            }
-        }
-        // Implicit input edge for entry block.
-        if let Some(entry) = f.layout.entry_block() {
-            block_in_count[entry] += 1;
-        }
-
-        // Here we define the implicit CLIF-plus-edges graph. There are
-        // conceptually two such graphs: the original, with every edge explicit,
-        // and the merged one, with blocks (represented by `LoweredBlock`
-        // values) that contain original CLIF blocks, edges, or both. This
-        // function returns a lowered block's successors as per the latter, with
-        // consideration to edge-block merging.
-        //
-        // Note that there is a property of the block-merging rules below
-        // that is very important to ensure we don't miss any lowered blocks:
-        // any block in the implicit CLIF-plus-edges graph will *only* be
-        // included in one block in the merged graph.
-        //
-        // This, combined with the property that every edge block is reachable
-        // only from one predecessor (and hence cannot be reached by a DFS
-        // backedge), means that it is sufficient in our DFS below to track
-        // visited-bits per original CLIF block only, not per edge. This greatly
-        // simplifies the data structures (no need to keep a sparse hash-set of
-        // (block, block) tuples).
-        let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
-            let start_idx = ret.len();
-            match block {
-                LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
-                    // At an orig block; successors are always edge blocks,
-                    // possibly with orig blocks following.
-                    let range = block_succ_range[block];
-                    for &(edge_inst, succ) in &block_succs[range.0..range.1] {
-                        if block_in_count[succ] == 1 {
-                            ret.push((
-                                edge_inst,
-                                LoweredBlock::EdgeAndOrig {
-                                    pred: block,
-                                    edge_inst,
-                                    block: succ,
-                                },
-                            ));
-                        } else {
-                            ret.push((
-                                edge_inst,
-                                LoweredBlock::Edge {
-                                    pred: block,
-                                    edge_inst,
-                                    succ,
-                                },
-                            ));
-                        }
-                    }
-                }
-                LoweredBlock::Edge {
-                    succ, edge_inst, ..
-                }
-                | LoweredBlock::OrigAndEdge {
-                    succ, edge_inst, ..
-                } => {
-                    // At an edge block; successors are always orig blocks,
-                    // possibly with edge blocks following.
-                    if block_out_count[succ] == 1 {
-                        let range = block_succ_range[succ];
-                        // check if the one succ is a real CFG edge (vs.
-                        // implicit return succ).
-                        if range.1 - range.0 > 0 {
-                            debug_assert!(range.1 - range.0 == 1);
-                            let (succ_edge_inst, succ_succ) = block_succs[range.0];
-                            ret.push((
-                                edge_inst,
-                                LoweredBlock::OrigAndEdge {
-                                    block: succ,
-                                    edge_inst: succ_edge_inst,
-                                    succ: succ_succ,
-                                },
-                            ));
-                        } else {
-                            ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
-                        }
-                    } else {
-                        ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
-                    }
-                }
-            }
-            let end_idx = ret.len();
-            (start_idx, end_idx)
-        };
-
-        // Build the explicit LoweredBlock-to-LoweredBlock successors list.
-        let mut lowered_succs = vec![];
-        let mut lowered_succ_indices = vec![];
-
-        // Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
-        // explicit stack so we don't overflow the real stack with a deep DFS.
-        #[derive(Debug)]
-        struct StackEntry {
-            this: LoweredBlock,
-            succs: (usize, usize), // range in lowered_succs
-            cur_succ: usize,       // index in lowered_succs
-        }
-
-        let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
-        let mut visited = FxHashSet::default();
-        let mut postorder = vec![];
-        if let Some(entry) = f.layout.entry_block() {
-            // FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
-            // to not special-case the entry block here.
-            let block = LoweredBlock::Orig { block: entry };
-            visited.insert(block);
-            let range = compute_lowered_succs(&mut lowered_succs, block);
-            lowered_succ_indices.resize(lowered_succs.len(), 0);
-            stack.push(StackEntry {
-                this: block,
-                succs: range,
-                cur_succ: range.1,
-            });
-        }
-
-        let mut deferred_last = None;
-        while !stack.is_empty() {
-            let stack_entry = stack.last_mut().unwrap();
-            let range = stack_entry.succs;
-            if stack_entry.cur_succ == range.0 {
-                let orig_block = stack_entry.this.orig_block();
-                if orig_block.is_some() && orig_block == fallthrough_return_block {
-                    deferred_last = Some((stack_entry.this, range));
-                } else {
-                    postorder.push((stack_entry.this, range));
-                }
-                stack.pop();
-            } else {
-                // Heuristic: chase the children in reverse. This puts the first
-                // successor block first in RPO, all other things being equal,
-                // which tends to prioritize loop backedges over out-edges,
-                // putting the edge-block closer to the loop body and minimizing
-                // live-ranges in linear instruction space.
-                let next = lowered_succs[stack_entry.cur_succ - 1].1;
-                stack_entry.cur_succ -= 1;
-                if visited.contains(&next) {
-                    continue;
-                }
-                visited.insert(next);
-                let range = compute_lowered_succs(&mut lowered_succs, next);
-                lowered_succ_indices.resize(lowered_succs.len(), 0);
-                stack.push(StackEntry {
-                    this: next,
-                    succs: range,
-                    cur_succ: range.1,
-                });
-            }
-        }
-
-        postorder.reverse();
-        let mut rpo = postorder;
-        if let Some(d) = deferred_last {
-            rpo.push(d);
-        }
-
-        // Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
-        let mut lowered_order = vec![];
-        let mut lowered_succ_ranges = vec![];
-        let mut lb_to_bindex = FxHashMap::default();
-        for (block, succ_range) in rpo.into_iter() {
-            lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
-            lowered_order.push(block);
-            lowered_succ_ranges.push(succ_range);
-        }
-
-        let lowered_succ_indices = lowered_succs
-            .iter()
-            .map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
-            .collect();
-
-        let mut orig_map = SecondaryMap::with_default(None);
-        for (i, lb) in lowered_order.iter().enumerate() {
-            let i = i as BlockIndex;
-            if let Some(b) = lb.orig_block() {
-                orig_map[b] = Some(i);
-            }
-        }
-
-        let result = BlockLoweringOrder {
-            lowered_order,
-            lowered_succs,
-            lowered_succ_indices,
-            lowered_succ_ranges,
-            orig_map,
-        };
-        debug!("BlockLoweringOrder: {:?}", result);
-        result
-    }
-
-    /// Get the lowered order of blocks.
-    pub fn lowered_order(&self) -> &[LoweredBlock] {
-        &self.lowered_order[..]
-    }
-
-    /// Get the successors for a lowered block, by index in `lowered_order()`'s
-    /// returned slice. Each successsor is paired with the edge-instruction
-    /// (branch) corresponding to this edge.
-    pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
-        let range = self.lowered_succ_ranges[block as usize];
-        &self.lowered_succs[range.0..range.1]
-    }
-
-    /// Get the successor indices for a lowered block.
-    pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
-        let range = self.lowered_succ_ranges[block as usize];
-        &self.lowered_succ_indices[range.0..range.1]
-    }
-
-    /// Get the lowered block index containing a CLIF block, if any. (May not be
-    /// present if the original CLIF block was unreachable.)
-    pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
-        self.orig_map[bb]
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::types::*;
-    use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
-    use crate::isa::CallConv;
-
-    fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
-        assert!(n_blocks > 0);
-
-        let name = ExternalName::testcase("test0");
-        let mut sig = Signature::new(CallConv::SystemV);
-        sig.params.push(AbiParam::new(I32));
-        let mut func = Function::with_name_signature(name, sig);
-        let blocks = (0..n_blocks)
-            .map(|i| {
-                let bb = func.dfg.make_block();
-                assert!(bb.as_u32() == i as u32);
-                bb
-            })
-            .collect::<Vec<_>>();
-
-        let arg0 = func.dfg.append_block_param(blocks[0], I32);
-
-        let mut pos = FuncCursor::new(&mut func);
-
-        let mut edge = 0;
-        for i in 0..n_blocks {
-            pos.insert_block(blocks[i]);
-            let mut succs = vec![];
-            while edge < edges.len() && edges[edge].0 == i {
-                succs.push(edges[edge].1);
-                edge += 1;
-            }
-            if succs.len() == 0 {
-                pos.ins().return_(&[arg0]);
-            } else if succs.len() == 1 {
-                pos.ins().jump(blocks[succs[0]], &[]);
-            } else if succs.len() == 2 {
-                pos.ins().brnz(arg0, blocks[succs[0]], &[]);
-                pos.ins().jump(blocks[succs[1]], &[]);
-            } else {
-                panic!("Too many successors");
-            }
-        }
-
-        func
-    }
-
-    #[test]
-    fn test_blockorder_diamond() {
-        let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
-        let order = BlockLoweringOrder::new(&func);
-
-        assert_eq!(order.lowered_order.len(), 6);
-
-        assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
-        assert!(order.lowered_order[0].in_edge().is_none());
-        assert!(order.lowered_order[0].out_edge().is_none());
-
-        assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
-        assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
-        assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
-
-        assert!(order.lowered_order[2].orig_block().is_none());
-        assert!(order.lowered_order[2].in_edge().is_none());
-        assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
-        assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
-
-        assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
-        assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
-        assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
-        assert!(order.lowered_order[3].out_edge().is_none());
-
-        assert!(order.lowered_order[4].orig_block().is_none());
-        assert!(order.lowered_order[4].in_edge().is_none());
-        assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
-        assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
-
-        assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
-        assert!(order.lowered_order[5].in_edge().is_none());
-        assert!(order.lowered_order[5].out_edge().is_none());
-    }
-
-    #[test]
-    fn test_blockorder_critedge() {
-        //            0
-        //          /   \
-        //         1     2
-        //        /  \     \
-        //       3    4    |
-        //       |\  _|____|
-        //       | \/ |
-        //       | /\ |
-        //       5    6
-        //
-        // (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
-        //
-        let func = build_test_func(
-            7,
-            &[
-                (0, 1),
-                (0, 2),
-                (1, 3),
-                (1, 4),
-                (2, 5),
-                (3, 5),
-                (3, 6),
-                (4, 6),
-            ],
-        );
-        let order = BlockLoweringOrder::new(&func);
-
-        assert_eq!(order.lowered_order.len(), 11);
-        println!("ordered = {:?}", order.lowered_order);
-
-        // block 0
-        assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
-        assert!(order.lowered_order[0].in_edge().is_none());
-        assert!(order.lowered_order[0].out_edge().is_none());
-
-        // edge 0->1 + block 1
-        assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
-        assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
-        assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
-        assert!(order.lowered_order[1].out_edge().is_none());
-
-        // edge 1->3 + block 3
-        assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
-        assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
-        assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
-        assert!(order.lowered_order[2].out_edge().is_none());
-
-        // edge 3->5
-        assert!(order.lowered_order[3].orig_block().is_none());
-        assert!(order.lowered_order[3].in_edge().is_none());
-        assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
-        assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
-
-        // edge 3->6
-        assert!(order.lowered_order[4].orig_block().is_none());
-        assert!(order.lowered_order[4].in_edge().is_none());
-        assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
-        assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
-
-        // edge 1->4 + block 4
-        assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
-        assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
-        assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
-        assert!(order.lowered_order[5].out_edge().is_none());
-
-        // edge 4->6
-        assert!(order.lowered_order[6].orig_block().is_none());
-        assert!(order.lowered_order[6].in_edge().is_none());
-        assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
-        assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
-
-        // block 6
-        assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
-        assert!(order.lowered_order[7].in_edge().is_none());
-        assert!(order.lowered_order[7].out_edge().is_none());
-
-        // edge 0->2 + block 2
-        assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
-        assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
-        assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
-        assert!(order.lowered_order[8].out_edge().is_none());
-
-        // edge 2->5
-        assert!(order.lowered_order[9].orig_block().is_none());
-        assert!(order.lowered_order[9].in_edge().is_none());
-        assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
-        assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
-
-        // block 5
-        assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
-        assert!(order.lowered_order[10].in_edge().is_none());
-        assert!(order.lowered_order[10].out_edge().is_none());
-    }
+/// Compute the final block order.
+pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
+    let mut rpo = BlockRPO::new(vcode);
+    rpo.visit(vcode, vcode.entry());
+    rpo.rpo()
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
--- a/third_party/rust/cranelift-codegen/src/machinst/compile.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/compile.rs
@ -6,11 +6,11 @@ use crate::settings;
 use crate::timing;

 use log::debug;
-use regalloc::{allocate_registers_with_opts, Algorithm, Options};
+use regalloc::{allocate_registers, RegAllocAlgorithm};

 /// Compile the given function down to VCode with allocated registers, ready
 /// for binary emission.
-pub fn compile<B: LowerBackend + MachBackend>(
+pub fn compile<B: LowerBackend>(
    f: &Function,
    b: &B,
    abi: Box<dyn ABIBody<I = B::MInst>>,
@ -18,46 +18,29 @@ pub fn compile<B: LowerBackend + MachBackend>(
 where
    B::MInst: ShowWithRRU,
 {
-    // Compute lowered block order.
-    let block_order = BlockLoweringOrder::new(f);
-    // Build the lowering context.
-    let lower = Lower::new(f, abi, block_order)?;
-    // Lower the IR.
-    let mut vcode = lower.lower(b)?;
+    // This lowers the CL IR.
+    let mut vcode = Lower::new(f, abi)?.lower(b)?;

-    debug!(
-        "vcode from lowering: \n{}",
-        vcode.show_rru(Some(b.reg_universe()))
-    );
+    let universe = &B::MInst::reg_universe(vcode.flags());
+
+    debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));

    // Perform register allocation.
-    let (run_checker, algorithm) = match vcode.flags().regalloc() {
-        settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
-        settings::Regalloc::BacktrackingChecked => {
-            (true, Algorithm::Backtracking(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScan => {
-            (false, Algorithm::LinearScan(Default::default()))
-        }
-        settings::Regalloc::ExperimentalLinearScanChecked => {
-            (true, Algorithm::LinearScan(Default::default()))
-        }
+    let algorithm = match vcode.flags().regalloc() {
+        settings::Regalloc::Backtracking => RegAllocAlgorithm::Backtracking,
+        settings::Regalloc::BacktrackingChecked => RegAllocAlgorithm::BacktrackingChecked,
+        settings::Regalloc::ExperimentalLinearScan => RegAllocAlgorithm::LinearScan,
    };

    let result = {
        let _tt = timing::regalloc();
-        allocate_registers_with_opts(
-            &mut vcode,
-            b.reg_universe(),
-            Options {
-                run_checker,
-                algorithm,
-            },
+        allocate_registers(
+            &mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
        )
        .map_err(|err| {
            debug!(
                "Register allocation error for vcode\n{}\nError: {:?}",
-                vcode.show_rru(Some(b.reg_universe())),
+                vcode.show_rru(Some(universe)),
                err
            );
            err
@ -69,9 +52,14 @@ where
    // all at once. This also inserts prologues/epilogues.
    vcode.replace_insns_from_regalloc(result);

+    vcode.remove_redundant_branches();
+
+    // Do final passes over code to finalize branches.
+    vcode.finalize_branches();
+
    debug!(
        "vcode after regalloc: final version:\n{}",
-        vcode.show_rru(Some(b.reg_universe()))
+        vcode.show_rru(Some(universe))
    );

    Ok(vcode)
--- a/third_party/rust/cranelift-codegen/src/machinst/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
--- a/third_party/rust/cranelift-codegen/src/machinst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/mod.rs
@ -109,7 +109,6 @@ use regalloc::RegUsageCollector;
 use regalloc::{
    RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
 };
-use smallvec::SmallVec;
 use std::string::String;
 use target_lexicon::Triple;

@ -125,8 +124,8 @@ pub mod abi;
 pub use abi::*;
 pub mod pretty_print;
 pub use pretty_print::*;
-pub mod buffer;
-pub use buffer::*;
+pub mod sections;
+pub use sections::*;
 pub mod adapter;
 pub use adapter::*;

@ -138,7 +137,7 @@ pub trait MachInst: Clone + Debug {

    /// Map virtual registers to physical registers using the given virt->phys
    /// maps corresponding to the program points prior to, and after, this instruction.
-    fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
+    fn map_regs(&mut self, maps: &RegUsageMapper);

    /// If this is a simple move, return the (source, destination) tuple of registers.
    fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
@ -153,9 +152,6 @@ pub trait MachInst: Clone + Debug {
    /// Generate a move.
    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;

-    /// Generate a constant into a reg.
-    fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]>;
-
    /// Generate a zero-length no-op.
    fn gen_zero_len_nop() -> Self;

@ -170,7 +166,7 @@ pub trait MachInst: Clone + Debug {

    /// Generate a jump to another target. Used during lowering of
    /// control flow.
-    fn gen_jump(target: MachLabel) -> Self;
+    fn gen_jump(target: BlockIndex) -> Self;

    /// Generate a NOP. The `preferred_size` parameter allows the caller to
    /// request a NOP of that size, or as close to it as possible. The machine
@ -179,6 +175,17 @@ pub trait MachInst: Clone + Debug {
    /// the instruction must have a nonzero size.
    fn gen_nop(preferred_size: usize) -> Self;

+    /// Rewrite block targets using the block-target map.
+    fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
+
+    /// Finalize branches once the block order (fallthrough) is known.
+    fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
+
+    /// Update instruction once block offsets are known.  These offsets are
+    /// relative to the beginning of the function. `targets` is indexed by
+    /// BlockIndex.
+    fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
+
    /// Get the register universe for this backend.
    fn reg_universe(flags: &Flags) -> RealRegUniverse;

@ -187,54 +194,6 @@ pub trait MachInst: Clone + Debug {
    fn align_basic_block(offset: CodeOffset) -> CodeOffset {
        offset
    }
-
-    /// What is the worst-case instruction size emitted by this instruction type?
-    fn worst_case_size() -> CodeOffset;
-
-    /// A label-use kind: a type that describes the types of label references that
-    /// can occur in an instruction.
-    type LabelUse: MachInstLabelUse;
-}
-
-/// A descriptor of a label reference (use) in an instruction set.
-pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
-    /// Required alignment for any veneer. Usually the required instruction
-    /// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
-    const ALIGN: CodeOffset;
-
-    /// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
-    /// label-reference fixup at offset `x` is valid if the label resolves to `x
-    /// + 1024`.
-    fn max_pos_range(self) -> CodeOffset;
-    /// What is the maximum PC-relative range (negative)? This is the absolute
-    /// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
-    /// valid if the label resolves to `x - 1024`.
-    fn max_neg_range(self) -> CodeOffset;
-    /// What is the size of code-buffer slice this label-use needs to patch in
-    /// the label's value?
-    fn patch_size(self) -> CodeOffset;
-    /// Perform a code-patch, given the offset into the buffer of this label use
-    /// and the offset into the buffer of the label's definition.
-    /// It is guaranteed that, given `delta = offset - label_offset`, we will
-    /// have `offset >= -self.max_neg_range()` and `offset <=
-    /// self.max_pos_range()`.
-    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
-    /// Can the label-use be patched to a veneer that supports a longer range?
-    /// Usually valid for jumps (a short-range jump can jump to a longer-range
-    /// jump), but not for e.g. constant pool references, because the constant
-    /// load would require different code (one more level of indirection).
-    fn supports_veneer(self) -> bool;
-    /// How many bytes are needed for a veneer?
-    fn veneer_size(self) -> CodeOffset;
-    /// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
-    /// bytes long at offset `veneer_offset` in the buffer. The original
-    /// label-use will be patched to refer to this veneer's offset.  A new
-    /// (offset, LabelUse) is returned that allows the veneer to use the actual
-    /// label. For veneers to work properly, it is expected that the new veneer
-    /// has a larger range; on most platforms this probably means either a
-    /// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
-    /// stage, a jump that supports a full 32-bit range, for example.
-    fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
 }

 /// Describes a block terminator (not call) in the vcode, when its branches
@ -246,26 +205,24 @@ pub enum MachTerminator<'a> {
    /// A return instruction.
    Ret,
    /// An unconditional branch to another block.
-    Uncond(MachLabel),
+    Uncond(BlockIndex),
    /// A conditional branch to one of two other blocks.
-    Cond(MachLabel, MachLabel),
+    Cond(BlockIndex, BlockIndex),
    /// An indirect branch with known possible targets.
-    Indirect(&'a [MachLabel]),
+    Indirect(&'a [BlockIndex]),
 }

 /// A trait describing the ability to encode a MachInst into binary machine code.
-pub trait MachInstEmit: MachInst {
-    /// Persistent state carried across `emit` invocations.
-    type State: Default + Clone + Debug;
+pub trait MachInstEmit<O: MachSectionOutput> {
    /// Emit the instruction.
-    fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
+    fn emit(&self, code: &mut O, flags: &Flags);
 }

 /// The result of a `MachBackend::compile_function()` call. Contains machine
 /// code (as bytes) and a disassembly, if requested.
 pub struct MachCompileResult {
    /// Machine code.
-    pub buffer: MachBufferFinalized,
+    pub sections: MachSections,
    /// Size of stack frame, in bytes.
    pub frame_size: u32,
    /// Disassembly, if requested.
@ -275,7 +232,7 @@ pub struct MachCompileResult {
 impl MachCompileResult {
    /// Get a `CodeInfo` describing section sizes from this compilation result.
    pub fn code_info(&self) -> CodeInfo {
-        let code_size = self.buffer.total_size();
+        let code_size = self.sections.total_size();
        CodeInfo {
            code_size,
            jumptables_size: 0,
@ -305,13 +262,17 @@ pub trait MachBackend {
    fn name(&self) -> &'static str;

    /// Return the register universe for this backend.
-    fn reg_universe(&self) -> &RealRegUniverse;
+    fn reg_universe(&self) -> RealRegUniverse;

    /// Machine-specific condcode info needed by TargetIsa.
-    /// Condition that will be true when an IaddIfcout overflows.
-    fn unsigned_add_overflow_condition(&self) -> IntCC;
+    fn unsigned_add_overflow_condition(&self) -> IntCC {
+        // TODO: this is what x86 specifies. Is this right for arm64?
+        IntCC::UnsignedLessThan
+    }

    /// Machine-specific condcode info needed by TargetIsa.
-    /// Condition that will be true when an IsubIfcout overflows.
-    fn unsigned_sub_overflow_condition(&self) -> IntCC;
+    fn unsigned_sub_overflow_condition(&self) -> IntCC {
+        // TODO: this is what x86 specifies. Is this right for arm64?
+        IntCC::UnsignedLessThan
+    }
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/sections.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/sections.rs
@ -0,0 +1,460 @@
+//! In-memory representation of compiled machine code, in multiple sections
+//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
+//! simultaneously, so we buffer the result in memory and hand off to the
+//! caller at the end of compilation.
+
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
+
+use alloc::vec::Vec;
+
+/// A collection of sections with defined start-offsets.
+pub struct MachSections {
+    /// Sections, in offset order.
+    pub sections: Vec<MachSection>,
+}
+
+impl MachSections {
+    /// New, empty set of sections.
+    pub fn new() -> MachSections {
+        MachSections { sections: vec![] }
+    }
+
+    /// Add a section with a known offset and size. Returns the index.
+    pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
+        let idx = self.sections.len();
+        self.sections.push(MachSection::new(start, length));
+        idx
+    }
+
+    /// Mutably borrow the given section by index.
+    pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
+        &mut self.sections[idx]
+    }
+
+    /// Get mutable borrows of two sections simultaneously. Used during
+    /// instruction emission to provide references to the .text and .rodata
+    /// (constant pool) sections.
+    pub fn two_sections<'a>(
+        &'a mut self,
+        idx1: usize,
+        idx2: usize,
+    ) -> (&'a mut MachSection, &'a mut MachSection) {
+        assert!(idx1 < idx2);
+        assert!(idx1 < self.sections.len());
+        assert!(idx2 < self.sections.len());
+        let (first, rest) = self.sections.split_at_mut(idx2);
+        (&mut first[idx1], &mut rest[0])
+    }
+
+    /// Emit this set of sections to a set of sinks for the code,
+    /// relocations, traps, and stackmap.
+    pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
+        // N.B.: we emit every section into the .text section as far as
+        // the `CodeSink` is concerned; we do not bother to segregate
+        // the contents into the actual program text, the jumptable and the
+        // rodata (constant pool). This allows us to generate code assuming
+        // that these will not be relocated relative to each other, and avoids
+        // having to designate each section as belonging in one of the three
+        // fixed categories defined by `CodeSink`. If this becomes a problem
+        // later (e.g. because of memory permissions or similar), we can
+        // add this designation and segregate the output; take care, however,
+        // to add the appropriate relocations in this case.
+
+        for section in &self.sections {
+            if section.data.len() > 0 {
+                while sink.offset() < section.start_offset {
+                    sink.put1(0);
+                }
+                section.emit(sink);
+            }
+        }
+        sink.begin_jumptables();
+        sink.begin_rodata();
+        sink.end_codegen();
+    }
+
+    /// Get a list of source location mapping tuples in sorted-by-start-offset order.
+    pub fn get_srclocs_sorted<'a>(&'a self) -> MachSectionsSrcLocs<'a> {
+        MachSectionsSrcLocs::new(&self.sections)
+    }
+
+    /// Get the total required size for these sections.
+    pub fn total_size(&self) -> CodeOffset {
+        if self.sections.len() == 0 {
+            0
+        } else {
+            // Find the last non-empty section.
+            self.sections
+                .iter()
+                .rev()
+                .find(|s| s.data.len() > 0)
+                .map(|s| s.cur_offset_from_start())
+                .unwrap_or(0)
+        }
+    }
+}
+
+/// An iterator over the srclocs in each section.
+/// Returns MachSrcLocs in an order sorted by start location.
+pub struct MachSectionsSrcLocs<'a> {
+    sections: &'a [MachSection],
+    cur_section: usize,
+    cur_srcloc: usize,
+    // For validation:
+    last_offset: CodeOffset,
+}
+
+impl<'a> MachSectionsSrcLocs<'a> {
+    fn new(sections: &'a [MachSection]) -> MachSectionsSrcLocs<'a> {
+        MachSectionsSrcLocs {
+            sections,
+            cur_section: 0,
+            cur_srcloc: 0,
+            last_offset: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for MachSectionsSrcLocs<'a> {
+    type Item = &'a MachSrcLoc;
+
+    fn next(&mut self) -> Option<&'a MachSrcLoc> {
+        // We simply iterate through sections and srcloc records in order. This produces a
+        // sorted order naturally because sections are in starting-offset-order, and srclocs
+        // are produced as a section is emitted into, so are in order as well.
+
+        // If we're out of sections, we're done.
+        if self.cur_section >= self.sections.len() {
+            return None;
+        }
+
+        // Otherwise, make sure we have a srcloc in the current section left to return, and
+        // advance to the next section if not. Done if we run out of sections.
+        while self.cur_srcloc >= self.sections[self.cur_section].srclocs.len() {
+            self.cur_srcloc = 0;
+            self.cur_section += 1;
+            if self.cur_section >= self.sections.len() {
+                return None;
+            }
+        }
+
+        let loc = &self.sections[self.cur_section].srclocs[self.cur_srcloc];
+        self.cur_srcloc += 1;
+        debug_assert!(loc.start >= self.last_offset);
+        self.last_offset = loc.start;
+        Some(loc)
+    }
+}
+
+/// An abstraction over MachSection and MachSectionSize: some
+/// receiver of section data.
+pub trait MachSectionOutput {
+    /// Get the current offset from the start of all sections.
+    fn cur_offset_from_start(&self) -> CodeOffset;
+
+    /// Get the start offset of this section.
+    fn start_offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the section.
+    fn put1(&mut self, _: u8);
+
+    /// Add 2 bytes to the section.
+    fn put2(&mut self, value: u16) {
+        let [b0, b1] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+    }
+
+    /// Add 4 bytes to the section.
+    fn put4(&mut self, value: u32) {
+        let [b0, b1, b2, b3] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
+    }
+
+    /// Add 8 bytes to the section.
+    fn put8(&mut self, value: u64) {
+        let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
+        self.put1(b0);
+        self.put1(b1);
+        self.put1(b2);
+        self.put1(b3);
+        self.put1(b4);
+        self.put1(b5);
+        self.put1(b6);
+        self.put1(b7);
+    }
+
+    /// Add a slice of bytes to the section.
+    fn put_data(&mut self, data: &[u8]);
+
+    /// Add a relocation at the current offset.
+    fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
+
+    /// Add a trap record at the current offset.
+    fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
+
+    /// Add a call return address record at the current offset.
+    fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
+
+    /// Start the output for the given source-location at the current offset.
+    fn start_srcloc(&mut self, loc: SourceLoc);
+
+    /// End the output for the previously-given source-location at the current offset.
+    fn end_srcloc(&mut self);
+
+    /// Align up to the given alignment.
+    fn align_to(&mut self, align_to: CodeOffset) {
+        assert!(align_to.is_power_of_two());
+        while self.cur_offset_from_start() & (align_to - 1) != 0 {
+            self.put1(0);
+        }
+    }
+}
+
+/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
+/// Multiple sections may be created with known start offsets in advance; the
+/// usual use-case is to create the .text (code) and .rodata (constant pool) at
+/// once, after computing the length of the code, so that constant references
+/// can use known offsets as instructions are emitted.
+pub struct MachSection {
+    /// The starting offset of this section.
+    pub start_offset: CodeOffset,
+    /// The limit of this section, defined by the start of the next section.
+    pub length_limit: CodeOffset,
+    /// The section contents, as raw bytes.
+    pub data: Vec<u8>,
+    /// Any relocations referring to this section.
+    pub relocs: Vec<MachReloc>,
+    /// Any trap records referring to this section.
+    pub traps: Vec<MachTrap>,
+    /// Any call site records referring to this section.
+    pub call_sites: Vec<MachCallSite>,
+    /// Any source location mappings referring to this section.
+    pub srclocs: Vec<MachSrcLoc>,
+    /// The current source location in progress (after `start_srcloc()` and before `end_srcloc()`).
+    /// This is a (start_offset, src_loc) tuple.
+    pub cur_srcloc: Option<(CodeOffset, SourceLoc)>,
+}
+
+impl MachSection {
+    /// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
+    pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
+        MachSection {
+            start_offset,
+            length_limit,
+            data: vec![],
+            relocs: vec![],
+            traps: vec![],
+            call_sites: vec![],
+            srclocs: vec![],
+            cur_srcloc: None,
+        }
+    }
+
+    /// Emit this section to the CodeSink and other associated sinks.  The
+    /// current offset of the CodeSink must match the starting offset of this
+    /// section.
+    pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
+        assert!(sink.offset() == self.start_offset);
+
+        let mut next_reloc = 0;
+        let mut next_trap = 0;
+        let mut next_call_site = 0;
+        for (idx, byte) in self.data.iter().enumerate() {
+            if next_reloc < self.relocs.len() {
+                let reloc = &self.relocs[next_reloc];
+                if reloc.offset == idx as CodeOffset {
+                    sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
+                    next_reloc += 1;
+                }
+            }
+            if next_trap < self.traps.len() {
+                let trap = &self.traps[next_trap];
+                if trap.offset == idx as CodeOffset {
+                    sink.trap(trap.code, trap.srcloc);
+                    next_trap += 1;
+                }
+            }
+            if next_call_site < self.call_sites.len() {
+                let call_site = &self.call_sites[next_call_site];
+                if call_site.ret_addr == idx as CodeOffset {
+                    sink.add_call_site(call_site.opcode, call_site.srcloc);
+                    next_call_site += 1;
+                }
+            }
+            sink.put1(*byte);
+        }
+    }
+}
+
+impl MachSectionOutput for MachSection {
+    fn cur_offset_from_start(&self) -> CodeOffset {
+        self.start_offset + self.data.len() as CodeOffset
+    }
+
+    fn start_offset(&self) -> CodeOffset {
+        self.start_offset
+    }
+
+    fn put1(&mut self, value: u8) {
+        assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
+        self.data.push(value);
+    }
+
+    fn put_data(&mut self, data: &[u8]) {
+        assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
+        self.data.extend_from_slice(data);
+    }
+
+    fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
+        let name = name.clone();
+        self.relocs.push(MachReloc {
+            offset: self.data.len() as CodeOffset,
+            srcloc,
+            kind,
+            name,
+            addend,
+        });
+    }
+
+    fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
+        self.traps.push(MachTrap {
+            offset: self.data.len() as CodeOffset,
+            srcloc,
+            code,
+        });
+    }
+
+    fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
+        self.call_sites.push(MachCallSite {
+            ret_addr: self.data.len() as CodeOffset,
+            srcloc,
+            opcode,
+        });
+    }
+
+    fn start_srcloc(&mut self, loc: SourceLoc) {
+        self.cur_srcloc = Some((self.cur_offset_from_start(), loc));
+    }
+
+    fn end_srcloc(&mut self) {
+        let (start, loc) = self
+            .cur_srcloc
+            .take()
+            .expect("end_srcloc() called without start_srcloc()");
+        let end = self.cur_offset_from_start();
+        // Skip zero-length extends.
+        debug_assert!(end >= start);
+        if end > start {
+            self.srclocs.push(MachSrcLoc { start, end, loc });
+        }
+    }
+}
+
+/// A MachSectionOutput implementation that records only size.
+pub struct MachSectionSize {
+    /// The starting offset of this section.
+    pub start_offset: CodeOffset,
+    /// The current offset of this section.
+    pub offset: CodeOffset,
+}
+
+impl MachSectionSize {
+    /// Create a new size-counting dummy section.
+    pub fn new(start_offset: CodeOffset) -> MachSectionSize {
+        MachSectionSize {
+            start_offset,
+            offset: start_offset,
+        }
+    }
+
+    /// Return the size this section would take if emitted with a real sink.
+    pub fn size(&self) -> CodeOffset {
+        self.offset - self.start_offset
+    }
+}
+
+impl MachSectionOutput for MachSectionSize {
+    fn cur_offset_from_start(&self) -> CodeOffset {
+        // All size-counting sections conceptually start at offset 0; this doesn't
+        // matter when counting code size.
+        self.offset
+    }
+
+    fn start_offset(&self) -> CodeOffset {
+        self.start_offset
+    }
+
+    fn put1(&mut self, _: u8) {
+        self.offset += 1;
+    }
+
+    fn put_data(&mut self, data: &[u8]) {
+        self.offset += data.len() as CodeOffset;
+    }
+
+    fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
+
+    fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
+
+    fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
+
+    fn start_srcloc(&mut self, _: SourceLoc) {}
+
+    fn end_srcloc(&mut self) {}
+}
+
+/// A relocation resulting from a compilation.
+pub struct MachReloc {
+    /// The offset at which the relocation applies, *relative to the
+    /// containing section*.
+    pub offset: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The kind of relocation.
+    pub kind: Reloc,
+    /// The external symbol / name to which this relocation refers.
+    pub name: ExternalName,
+    /// The addend to add to the symbol value.
+    pub addend: i64,
+}
+
+/// A trap record resulting from a compilation.
+pub struct MachTrap {
+    /// The offset at which the trap instruction occurs, *relative to the
+    /// containing section*.
+    pub offset: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The trap code.
+    pub code: TrapCode,
+}
+
+/// A call site record resulting from a compilation.
+pub struct MachCallSite {
+    /// The offset of the call's return address, *relative to the containing section*.
+    pub ret_addr: CodeOffset,
+    /// The original source location.
+    pub srcloc: SourceLoc,
+    /// The call's opcode.
+    pub opcode: Opcode,
+}
+
+/// A source-location mapping resulting from a compilation.
+#[derive(Clone, Debug)]
+pub struct MachSrcLoc {
+    /// The start of the region of code corresponding to a source location.
+    /// This is relative to the start of the function, not to the start of the
+    /// section.
+    pub start: CodeOffset,
+    /// The end of the region of code corresponding to a source location.
+    /// This is relative to the start of the section, not to the start of the
+    /// section.
+    pub end: CodeOffset,
+    /// The source location.
+    pub loc: SourceLoc,
+}
--- a/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
@ -17,7 +17,9 @@
 //! See the main module comment in `mod.rs` for more details on the VCode-based
 //! backend pipeline.

-use crate::ir::{self, SourceLoc};
+use crate::entity::SecondaryMap;
+use crate::ir;
+use crate::ir::SourceLoc;
 use crate::machinst::*;
 use crate::settings;

@ -28,7 +30,9 @@ use regalloc::{
 };

 use alloc::boxed::Box;
-use alloc::{borrow::Cow, vec::Vec};
+use alloc::vec::Vec;
+use log::debug;
+use smallvec::SmallVec;
 use std::fmt;
 use std::iter;
 use std::string::String;
@ -40,8 +44,8 @@ pub type BlockIndex = u32;

 /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
 /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
-pub trait VCodeInst: MachInst + MachInstEmit {}
-impl<I: MachInst + MachInstEmit> VCodeInst for I {}
+pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
+impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}

 /// A function in "VCode" (virtualized-register code) form, after lowering.
 /// This is essentially a standard CFG of basic blocks, where each basic block
@ -75,10 +79,25 @@ pub struct VCode<I: VCodeInst> {
    /// Block successor lists, concatenated into one Vec. The `block_succ_range`
    /// list of tuples above gives (start, end) ranges within this list that
    /// correspond to each basic block's successors.
-    block_succs: Vec<BlockIx>,
+    block_succs: Vec<BlockIndex>,

-    /// Block-order information.
-    block_order: BlockLoweringOrder,
+    /// Block indices by IR block.
+    block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
+
+    /// IR block for each VCode Block. The length of this Vec will likely be
+    /// less than the total number of Blocks, because new Blocks (for edge
+    /// splits, for example) are appended during lowering.
+    bb_by_block: Vec<ir::Block>,
+
+    /// Order of block IDs in final generated code.
+    final_block_order: Vec<BlockIndex>,
+
+    /// Final block offsets. Computed during branch finalization and used
+    /// during emission.
+    final_block_offsets: Vec<CodeOffset>,
+
+    /// Size of code, accounting for block layout / alignment.
+    code_size: CodeOffset,

    /// ABI object.
    abi: Box<dyn ABIBody<I = I>>,
@ -102,8 +121,12 @@ pub struct VCodeBuilder<I: VCodeInst> {
    /// In-progress VCode.
    vcode: VCode<I>,

-    /// Index of the last block-start in the vcode.
-    block_start: InsnIndex,
+    /// Current basic block instructions, in reverse order (because blocks are
+    /// built bottom-to-top).
+    bb_insns: SmallVec<[(I, SourceLoc); 32]>,
+
+    /// Current IR-inst instructions, in forward order.
+    ir_inst_insns: SmallVec<[(I, SourceLoc); 4]>,

    /// Start of succs for the current block in the concatenated succs list.
    succ_start: usize,
@ -114,11 +137,12 @@ pub struct VCodeBuilder<I: VCodeInst> {

 impl<I: VCodeInst> VCodeBuilder<I> {
    /// Create a new VCodeBuilder.
-    pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
-        let vcode = VCode::new(abi, block_order);
+    pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
+        let vcode = VCode::new(abi);
        VCodeBuilder {
            vcode,
-            block_start: 0,
+            bb_insns: SmallVec::new(),
+            ir_inst_insns: SmallVec::new(),
            succ_start: 0,
            cur_srcloc: SourceLoc::default(),
        }
@ -129,11 +153,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        &mut *self.vcode.abi
    }

-    /// Access to the BlockLoweringOrder object.
-    pub fn block_order(&self) -> &BlockLoweringOrder {
-        &self.vcode.block_order
-    }
-
    /// Set the type of a VReg.
    pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
        while self.vcode.vreg_types.len() <= vreg.get_index() {
@ -142,17 +161,53 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        self.vcode.vreg_types[vreg.get_index()] = ty;
    }

+    /// Return the underlying bb-to-BlockIndex map.
+    pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
+        &self.vcode.block_by_bb
+    }
+
+    /// Initialize the bb-to-BlockIndex map. Returns the first free
+    /// BlockIndex.
+    pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
+        let mut bindex: BlockIndex = 0;
+        for bb in blocks.iter() {
+            self.vcode.block_by_bb[*bb] = bindex;
+            self.vcode.bb_by_block.push(*bb);
+            bindex += 1;
+        }
+        bindex
+    }
+
+    /// Get the BlockIndex for an IR block.
+    pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
+        self.vcode.block_by_bb[bb]
+    }
+
    /// Set the current block as the entry block.
    pub fn set_entry(&mut self, block: BlockIndex) {
        self.vcode.entry = block;
    }

+    /// End the current IR instruction. Must be called after pushing any
+    /// instructions and prior to ending the basic block.
+    pub fn end_ir_inst(&mut self) {
+        while let Some(pair) = self.ir_inst_insns.pop() {
+            self.bb_insns.push(pair);
+        }
+    }
+
    /// End the current basic block. Must be called after emitting vcode insts
    /// for IR insts and prior to ending the function (building the VCode).
-    pub fn end_bb(&mut self) {
-        let start_idx = self.block_start;
+    pub fn end_bb(&mut self) -> BlockIndex {
+        assert!(self.ir_inst_insns.is_empty());
+        let block_num = self.vcode.block_ranges.len() as BlockIndex;
+        // Push the instructions.
+        let start_idx = self.vcode.insts.len() as InsnIndex;
+        while let Some((i, loc)) = self.bb_insns.pop() {
+            self.vcode.insts.push(i);
+            self.vcode.srclocs.push(loc);
+        }
        let end_idx = self.vcode.insts.len() as InsnIndex;
-        self.block_start = end_idx;
        // Add the instruction index range to the list of blocks.
        self.vcode.block_ranges.push((start_idx, end_idx));
        // End the successors list.
@ -161,6 +216,8 @@ impl<I: VCodeInst> VCodeBuilder<I> {
            .block_succ_range
            .push((self.succ_start, succ_end));
        self.succ_start = succ_end;
+
+        block_num
    }

    /// Push an instruction for the current BB and current IR inst within the BB.
@ -168,27 +225,19 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        match insn.is_term() {
            MachTerminator::None | MachTerminator::Ret => {}
            MachTerminator::Uncond(target) => {
-                self.vcode.block_succs.push(BlockIx::new(target.get()));
+                self.vcode.block_succs.push(target);
            }
            MachTerminator::Cond(true_branch, false_branch) => {
-                self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
-                self.vcode
-                    .block_succs
-                    .push(BlockIx::new(false_branch.get()));
+                self.vcode.block_succs.push(true_branch);
+                self.vcode.block_succs.push(false_branch);
            }
            MachTerminator::Indirect(targets) => {
                for target in targets {
-                    self.vcode.block_succs.push(BlockIx::new(target.get()));
+                    self.vcode.block_succs.push(*target);
                }
            }
        }
-        self.vcode.insts.push(insn);
-        self.vcode.srclocs.push(self.cur_srcloc);
-    }
-
-    /// Get the current source location.
-    pub fn get_srcloc(&self) -> SourceLoc {
-        self.cur_srcloc
+        self.ir_inst_insns.push((insn, self.cur_srcloc));
    }

    /// Set the current source location.
@ -198,6 +247,8 @@ impl<I: VCodeInst> VCodeBuilder<I> {

    /// Build the final VCode.
    pub fn build(self) -> VCode<I> {
+        assert!(self.ir_inst_insns.is_empty());
+        assert!(self.bb_insns.is_empty());
        self.vcode
    }
 }
@ -219,9 +270,35 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
    }
 }

+fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
+    let range = vcode.block_insns(BlockIx::new(block));
+
+    debug!(
+        "is_trivial_jump_block: block {} has len {}",
+        block,
+        range.len()
+    );
+
+    if range.len() != 1 {
+        return None;
+    }
+    let insn = range.first();
+
+    debug!(
+        " -> only insn is: {:?} with terminator {:?}",
+        vcode.get_insn(insn),
+        vcode.get_insn(insn).is_term()
+    );
+
+    match vcode.get_insn(insn).is_term() {
+        MachTerminator::Uncond(target) => Some(target),
+        _ => None,
+    }
+}
+
 impl<I: VCodeInst> VCode<I> {
    /// New empty VCode.
-    fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
+    fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
        VCode {
            liveins: abi.liveins(),
            liveouts: abi.liveouts(),
@ -232,7 +309,11 @@ impl<I: VCodeInst> VCode<I> {
            block_ranges: vec![],
            block_succ_range: vec![],
            block_succs: vec![],
-            block_order,
+            block_by_bb: SecondaryMap::with_default(0),
+            bb_by_block: vec![],
+            final_block_order: vec![],
+            final_block_offsets: vec![],
+            code_size: 0,
            abi,
        }
    }
@ -264,7 +345,7 @@ impl<I: VCodeInst> VCode<I> {
    }

    /// Get the successors for a block.
-    pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
+    pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
        let (start, end) = self.block_succ_range[block as usize];
        &self.block_succs[start..end]
    }
@ -273,6 +354,8 @@ impl<I: VCodeInst> VCode<I> {
    /// instructions including spliced fill/reload/move instructions, and replace
    /// the VCode with them.
    pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
+        self.final_block_order = compute_final_block_order(self);
+
        // Record the spillslot count and clobbered registers for the ABI/stack
        // setup code.
        self.abi.set_num_spillslots(result.num_spill_slots as usize);
@ -287,12 +370,11 @@ impl<I: VCodeInst> VCode<I> {
        let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
        let mut final_srclocs = vec![];

-        for block in 0..self.num_blocks() {
-            let block = block as BlockIndex;
-            let (start, end) = block_ranges[block as usize];
+        for block in &self.final_block_order {
+            let (start, end) = block_ranges[*block as usize];
            let final_start = final_insns.len() as InsnIndex;

-            if block == self.entry {
+            if *block == self.entry {
                // Start with the prologue.
                let prologue = self.abi.gen_prologue();
                let len = prologue.len();
@ -334,7 +416,7 @@ impl<I: VCodeInst> VCode<I> {
            }

            let final_end = final_insns.len() as InsnIndex;
-            final_block_ranges[block as usize] = (final_start, final_end);
+            final_block_ranges[*block as usize] = (final_start, final_end);
        }

        debug_assert!(final_insns.len() == final_srclocs.len());
@ -344,68 +426,174 @@ impl<I: VCodeInst> VCode<I> {
        self.block_ranges = final_block_ranges;
    }

-    /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
-    /// reloc/trap/etc. records ready for use.
-    pub fn emit(&self) -> MachBuffer<I>
-    where
-        I: MachInstEmit,
-    {
-        let mut buffer = MachBuffer::new();
-        let mut state = Default::default();
+    /// Removes redundant branches, rewriting targets to point directly to the
+    /// ultimate block at the end of a chain of trivial one-target jumps.
+    pub fn remove_redundant_branches(&mut self) {
+        // For each block, compute the actual target block, looking through up to one
+        // block with single-target jumps (this will remove empty edge blocks inserted
+        // by phi-lowering).
+        let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
+            .map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
+            .collect();
+        let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];

-        buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
+        debug!(
+            "remove_redundant_branches: block_rewrites = {:?}",
+            block_rewrites
+        );

-        let flags = self.abi.flags();
-        let mut cur_srcloc = None;
-        for block in 0..self.num_blocks() {
-            let block = block as BlockIndex;
-            let new_offset = I::align_basic_block(buffer.cur_offset());
-            while new_offset > buffer.cur_offset() {
-                // Pad with NOPs up to the aligned block offset.
-                let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
-                nop.emit(&mut buffer, flags, &mut Default::default());
-            }
-            assert_eq!(buffer.cur_offset(), new_offset);
+        refcounts[self.entry as usize] = 1;

-            let (start, end) = self.block_ranges[block as usize];
-            buffer.bind_label(MachLabel::from_block(block));
-            for iix in start..end {
-                let srcloc = self.srclocs[iix as usize];
-                if cur_srcloc != Some(srcloc) {
-                    if cur_srcloc.is_some() {
-                        buffer.end_srcloc();
+        for block in 0..self.num_blocks() as u32 {
+            for insn in self.block_insns(BlockIx::new(block)) {
+                self.get_insn_mut(insn)
+                    .with_block_rewrites(&block_rewrites[..]);
+                match self.get_insn(insn).is_term() {
+                    MachTerminator::Uncond(bix) => {
+                        refcounts[bix as usize] += 1;
                    }
-                    buffer.start_srcloc(srcloc);
-                    cur_srcloc = Some(srcloc);
-                }
-
-                self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
-            }
-
-            if cur_srcloc.is_some() {
-                buffer.end_srcloc();
-                cur_srcloc = None;
-            }
-
-            // Do we need an island? Get the worst-case size of the next BB and see if, having
-            // emitted that many bytes, we will be beyond the deadline.
-            if block < (self.num_blocks() - 1) as BlockIndex {
-                let next_block = block + 1;
-                let next_block_range = self.block_ranges[next_block as usize];
-                let next_block_size = next_block_range.1 - next_block_range.0;
-                let worst_case_next_bb = I::worst_case_size() * next_block_size;
-                if buffer.island_needed(worst_case_next_bb) {
-                    buffer.emit_island();
+                    MachTerminator::Cond(bix1, bix2) => {
+                        refcounts[bix1 as usize] += 1;
+                        refcounts[bix2 as usize] += 1;
+                    }
+                    MachTerminator::Indirect(blocks) => {
+                        for block in blocks {
+                            refcounts[*block as usize] += 1;
+                        }
+                    }
+                    _ => {}
                }
            }
        }

-        buffer
+        let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
+
+        let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
+        self.final_block_order = block_order
+            .into_iter()
+            .filter(|b| !deleted[*b as usize])
+            .collect();
+
+        // Rewrite successor information based on the block-rewrite map.
+        for succ in &mut self.block_succs {
+            let new_succ = block_rewrites[*succ as usize];
+            *succ = new_succ;
+        }
+    }
+
+    /// Mutate branch instructions to (i) lower two-way condbrs to one-way,
+    /// depending on fallthrough; and (ii) use concrete offsets.
+    pub fn finalize_branches(&mut self)
+    where
+        I: MachInstEmit<MachSectionSize>,
+    {
+        // Compute fallthrough block, indexed by block.
+        let num_final_blocks = self.final_block_order.len();
+        let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
+        for i in 0..(num_final_blocks - 1) {
+            let from = self.final_block_order[i];
+            let to = self.final_block_order[i + 1];
+            block_fallthrough[from as usize] = Some(to);
+        }
+
+        // Pass over VCode instructions and finalize two-way branches into
+        // one-way branches with fallthrough.
+        for block in 0..self.num_blocks() {
+            let next_block = block_fallthrough[block];
+            let (start, end) = self.block_ranges[block];
+
+            for iix in start..end {
+                let insn = &mut self.insts[iix as usize];
+                insn.with_fallthrough_block(next_block);
+            }
+        }
+
+        let flags = self.abi.flags();
+
+        // Compute block offsets.
+        let mut code_section = MachSectionSize::new(0);
+        let mut block_offsets = vec![0; self.num_blocks()];
+        for &block in &self.final_block_order {
+            code_section.offset = I::align_basic_block(code_section.offset);
+            block_offsets[block as usize] = code_section.offset;
+            let (start, end) = self.block_ranges[block as usize];
+            for iix in start..end {
+                self.insts[iix as usize].emit(&mut code_section, flags);
+            }
+        }
+
+        // We now have the section layout.
+        self.final_block_offsets = block_offsets;
+        self.code_size = code_section.size();
+
+        // Update branches with known block offsets. This looks like the
+        // traversal above, but (i) does not update block_offsets, rather uses
+        // it (so forward references are now possible), and (ii) mutates the
+        // instructions.
+        let mut code_section = MachSectionSize::new(0);
+        for &block in &self.final_block_order {
+            code_section.offset = I::align_basic_block(code_section.offset);
+            let (start, end) = self.block_ranges[block as usize];
+            for iix in start..end {
+                self.insts[iix as usize]
+                    .with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
+                self.insts[iix as usize].emit(&mut code_section, flags);
+            }
+        }
+    }
+
+    /// Emit the instructions to a list of sections.
+    pub fn emit(&self) -> MachSections
+    where
+        I: MachInstEmit<MachSection>,
+    {
+        let mut sections = MachSections::new();
+        let code_idx = sections.add_section(0, self.code_size);
+        let code_section = sections.get_section(code_idx);
+
+        let flags = self.abi.flags();
+        let mut cur_srcloc = SourceLoc::default();
+        for &block in &self.final_block_order {
+            let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
+            while new_offset > code_section.cur_offset_from_start() {
+                // Pad with NOPs up to the aligned block offset.
+                let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
+                nop.emit(code_section, flags);
+            }
+            assert_eq!(code_section.cur_offset_from_start(), new_offset);
+
+            let (start, end) = self.block_ranges[block as usize];
+            for iix in start..end {
+                let srcloc = self.srclocs[iix as usize];
+                if srcloc != cur_srcloc {
+                    if !cur_srcloc.is_default() {
+                        code_section.end_srcloc();
+                    }
+                    if !srcloc.is_default() {
+                        code_section.start_srcloc(srcloc);
+                    }
+                    cur_srcloc = srcloc;
+                }
+
+                self.insts[iix as usize].emit(code_section, flags);
+            }
+
+            if !cur_srcloc.is_default() {
+                code_section.end_srcloc();
+                cur_srcloc = SourceLoc::default();
+            }
+        }
+
+        sections
    }

    /// Get the IR block for a BlockIndex, if one exists.
    pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
-        self.block_order.lowered_order()[block as usize].orig_block()
+        if (block as usize) < self.bb_by_block.len() {
+            Some(self.bb_by_block[block as usize])
+        } else {
+            None
+        }
    }
 }

@ -441,9 +629,13 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
        Range::new(InstIx::new(start), (end - start) as usize)
    }

-    fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
+    fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
        let (start, end) = self.block_succ_range[block.get() as usize];
-        Cow::Borrowed(&self.block_succs[start..end])
+        self.block_succs[start..end]
+            .iter()
+            .cloned()
+            .map(BlockIx::new)
+            .collect()
    }

    fn is_ret(&self, insn: InstIx) -> bool {
@ -457,7 +649,7 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
        insn.get_regs(collector)
    }

-    fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
+    fn map_regs(insn: &mut I, mapper: &RegUsageMapper) {
        insn.map_regs(mapper);
    }

@ -510,11 +702,12 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "VCode_Debug {{")?;
        writeln!(f, "  Entry block: {}", self.entry)?;
+        writeln!(f, "  Final block order: {:?}", self.final_block_order)?;

        for block in 0..self.num_blocks() {
            writeln!(f, "Block {}:", block,)?;
            for succ in self.succs(block as BlockIndex) {
-                writeln!(f, "  (successor: Block {})", succ.get())?;
+                writeln!(f, "  (successor: Block {})", succ)?;
            }
            let (start, end) = self.block_ranges[block];
            writeln!(f, "  (instruction range: {} .. {})", start, end)?;
@ -533,21 +726,52 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        use std::fmt::Write;

+        // Calculate an order in which to display the blocks.  This is the same
+        // as final_block_order, but also includes blocks which are in the
+        // representation but not in final_block_order.
+        let mut display_order = Vec::<usize>::new();
+        // First display blocks in `final_block_order`
+        for bix in &self.final_block_order {
+            assert!((*bix as usize) < self.num_blocks());
+            display_order.push(*bix as usize);
+        }
+        // Now also take care of those not listed in `final_block_order`.
+        // This is quadratic, but it's also debug-only code.
+        for bix in 0..self.num_blocks() {
+            if display_order.contains(&bix) {
+                continue;
+            }
+            display_order.push(bix);
+        }
+
        let mut s = String::new();
        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();
+        write!(
+            &mut s,
+            "  Final block order: {:?}\n",
+            self.final_block_order
+        )
+        .unwrap();

        for i in 0..self.num_blocks() {
-            let block = i as BlockIndex;
+            let block = display_order[i];

-            write!(&mut s, "Block {}:\n", block).unwrap();
-            if let Some(bb) = self.bindex_to_bb(block) {
+            let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
+            {
+                "** OMITTED **"
+            } else {
+                ""
+            };
+
+            write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
+            if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
                write!(&mut s, "  (original IR block: {})\n", bb).unwrap();
            }
-            for succ in self.succs(block) {
-                write!(&mut s, "  (successor: Block {})\n", succ.get()).unwrap();
+            for succ in self.succs(block as BlockIndex) {
+                write!(&mut s, "  (successor: Block {})\n", succ).unwrap();
            }
-            let (start, end) = self.block_ranges[block as usize];
+            let (start, end) = self.block_ranges[block];
            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
            for inst in start..end {
                write!(
--- a/third_party/rust/cranelift-codegen/src/num_uses.rs
+++ b/third_party/rust/cranelift-codegen/src/num_uses.rs
@ -0,0 +1,52 @@
+//! A pass that computes the number of uses of any given instruction.
+
+use crate::entity::SecondaryMap;
+use crate::ir::dfg::ValueDef;
+use crate::ir::Value;
+use crate::ir::{DataFlowGraph, Function, Inst};
+
+/// Auxiliary data structure that counts the number of uses of any given
+/// instruction in a Function. This is used during instruction selection
+/// to essentially do incremental DCE: when an instruction is no longer
+/// needed because its computation has been isel'd into another machine
+/// instruction at every use site, we can skip it.
+#[derive(Clone, Debug)]
+pub struct NumUses {
+    uses: SecondaryMap<Inst, u32>,
+}
+
+impl NumUses {
+    fn new() -> NumUses {
+        NumUses {
+            uses: SecondaryMap::with_default(0),
+        }
+    }
+
+    /// Compute the NumUses analysis result for a function.
+    pub fn compute(func: &Function) -> NumUses {
+        let mut uses = NumUses::new();
+        for bb in func.layout.blocks() {
+            for inst in func.layout.block_insts(bb) {
+                for arg in func.dfg.inst_args(inst) {
+                    let v = func.dfg.resolve_aliases(*arg);
+                    uses.add_value(&func.dfg, v);
+                }
+            }
+        }
+        uses
+    }
+
+    fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
+        match dfg.value_def(v) {
+            ValueDef::Result(inst, _) => {
+                self.uses[inst] += 1;
+            }
+            _ => {}
+        }
+    }
+
+    /// Take the complete uses map, consuming this analysis result.
+    pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
+        self.uses
+    }
+}
--- a/third_party/rust/cranelift-codegen/src/peepmatic.rs
+++ b/third_party/rust/cranelift-codegen/src/peepmatic.rs
@ -1,887 +0,0 @@
-//! Glue for working with `peepmatic`-generated peephole optimizers.
-
-use crate::cursor::{Cursor, FuncCursor};
-use crate::ir::{
-    dfg::DataFlowGraph,
-    entities::{Inst, Value},
-    immediates::{Imm64, Uimm64},
-    instructions::{InstructionData, Opcode},
-    types, InstBuilder,
-};
-use crate::isa::TargetIsa;
-use cranelift_codegen_shared::condcodes::IntCC;
-use peepmatic_runtime::{
-    cc::ConditionCode,
-    instruction_set::InstructionSet,
-    operator::Operator,
-    part::{Constant, Part},
-    paths::Path,
-    r#type::{BitWidth, Kind, Type},
-    PeepholeOptimizations, PeepholeOptimizer,
-};
-use std::boxed::Box;
-use std::convert::{TryFrom, TryInto};
-use std::ptr;
-use std::sync::atomic::{AtomicPtr, Ordering};
-
-/// Get the `preopt.peepmatic` peephole optimizer.
-pub(crate) fn preopt<'a, 'b>(
-    isa: &'b dyn TargetIsa,
-) -> PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa> {
-    static SERIALIZED: &[u8] = include_bytes!("preopt.serialized");
-
-    // Once initialized, this must never be re-assigned. The initialized value
-    // is semantically "static data" and is intentionally leaked for the whole
-    // program's lifetime.
-    static DESERIALIZED: AtomicPtr<PeepholeOptimizations> = AtomicPtr::new(ptr::null_mut());
-
-    // If `DESERIALIZED` has already been initialized, then just use it.
-    let ptr = DESERIALIZED.load(Ordering::SeqCst);
-    if let Some(peep_opts) = unsafe { ptr.as_ref() } {
-        return peep_opts.optimizer(isa);
-    }
-
-    // Otherwise, if `DESERIALIZED` hasn't been initialized, then we need to
-    // deserialize the peephole optimizations and initialize it. However,
-    // another thread could be doing the same thing concurrently, so there is a
-    // race to see who initializes `DESERIALIZED` first, and we need to be
-    // prepared to both win or lose that race.
-    let peep_opts = PeepholeOptimizations::deserialize(SERIALIZED)
-        .expect("should always be able to deserialize `preopt.serialized`");
-    let peep_opts = Box::into_raw(Box::new(peep_opts));
-
-    // Only update `DESERIALIZE` if it is still null, attempting to perform the
-    // one-time transition from null -> non-null.
-    if DESERIALIZED
-        .compare_and_swap(ptr::null_mut(), peep_opts, Ordering::SeqCst)
-        .is_null()
-    {
-        // We won the race to initialize `DESERIALIZED`.
-        debug_assert_eq!(DESERIALIZED.load(Ordering::SeqCst), peep_opts);
-        let peep_opts = unsafe { &*peep_opts };
-        return peep_opts.optimizer(isa);
-    }
-
-    // We lost the race to initialize `DESERIALIZED`. Drop our no-longer-needed
-    // instance of `peep_opts` and get the pointer to the instance that won the
-    // race.
-    let _ = unsafe { Box::from_raw(peep_opts) };
-    let peep_opts = DESERIALIZED.load(Ordering::SeqCst);
-    let peep_opts = unsafe { peep_opts.as_ref().unwrap() };
-    peep_opts.optimizer(isa)
-}
-
-/// Either a `Value` or an `Inst`.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum ValueOrInst {
-    Value(Value),
-    Inst(Inst),
-}
-
-impl ValueOrInst {
-    /// Get the underlying `Value` if any.
-    pub fn value(&self) -> Option<Value> {
-        match *self {
-            Self::Value(v) => Some(v),
-            Self::Inst(_) => None,
-        }
-    }
-
-    /// Get the underlying `Inst` if any.
-    pub fn inst(&self) -> Option<Inst> {
-        match *self {
-            Self::Inst(i) => Some(i),
-            Self::Value(_) => None,
-        }
-    }
-
-    /// Unwrap the underlying `Value`, panicking if it is not a `Value.
-    pub fn unwrap_value(&self) -> Value {
-        self.value().unwrap()
-    }
-
-    /// Unwrap the underlying `Inst`, panicking if it is not a `Inst.
-    pub fn unwrap_inst(&self) -> Inst {
-        self.inst().unwrap()
-    }
-
-    /// Is this a `Value`?
-    pub fn is_value(&self) -> bool {
-        self.value().is_some()
-    }
-
-    /// Is this an `Inst`?
-    pub fn is_inst(&self) -> bool {
-        self.inst().is_some()
-    }
-
-    fn resolve_inst(&self, dfg: &DataFlowGraph) -> Option<Inst> {
-        match *self {
-            ValueOrInst::Inst(i) => Some(i),
-            ValueOrInst::Value(v) => dfg.value_def(v).inst(),
-        }
-    }
-
-    fn result_bit_width(&self, dfg: &DataFlowGraph) -> u8 {
-        match *self {
-            ValueOrInst::Value(v) => dfg.value_type(v).bits().try_into().unwrap(),
-            ValueOrInst::Inst(inst) => {
-                let result = dfg.first_result(inst);
-                dfg.value_type(result).bits().try_into().unwrap()
-            }
-        }
-    }
-
-    fn to_constant(&self, pos: &mut FuncCursor) -> Option<Constant> {
-        let inst = self.resolve_inst(&pos.func.dfg)?;
-        match pos.func.dfg[inst] {
-            InstructionData::UnaryImm {
-                opcode: Opcode::Iconst,
-                imm,
-            } => {
-                let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
-                let x: i64 = imm.into();
-                Some(Constant::Int(x as u64, width))
-            }
-            InstructionData::UnaryBool {
-                opcode: Opcode::Bconst,
-                imm,
-            } => {
-                let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
-                Some(Constant::Bool(imm, width))
-            }
-            _ => None,
-        }
-    }
-}
-
-impl From<Value> for ValueOrInst {
-    fn from(v: Value) -> ValueOrInst {
-        ValueOrInst::Value(v)
-    }
-}
-
-impl From<Inst> for ValueOrInst {
-    fn from(i: Inst) -> ValueOrInst {
-        ValueOrInst::Inst(i)
-    }
-}
-
-/// Get the fixed bit width of `bit_width`, or if it is polymorphic, the bit
-/// width of `root`.
-fn bit_width(dfg: &DataFlowGraph, bit_width: BitWidth, root: Inst) -> u8 {
-    bit_width.fixed_width().unwrap_or_else(|| {
-        let tyvar = dfg.ctrl_typevar(root);
-        let ty = dfg.compute_result_type(root, 0, tyvar).unwrap();
-        u8::try_from(ty.bits()).unwrap()
-    })
-}
-
-/// Convert the constant `c` into an instruction.
-fn const_to_value<'a>(builder: impl InstBuilder<'a>, c: Constant, root: Inst) -> Value {
-    match c {
-        Constant::Bool(b, width) => {
-            let width = bit_width(builder.data_flow_graph(), width, root);
-            let ty = match width {
-                1 => types::B1,
-                8 => types::B8,
-                16 => types::B16,
-                32 => types::B32,
-                64 => types::B64,
-                128 => types::B128,
-                _ => unreachable!(),
-            };
-            builder.bconst(ty, b)
-        }
-        Constant::Int(x, width) => {
-            let width = bit_width(builder.data_flow_graph(), width, root);
-            let ty = match width {
-                8 => types::I8,
-                16 => types::I16,
-                32 => types::I32,
-                64 => types::I64,
-                128 => types::I128,
-                _ => unreachable!(),
-            };
-            builder.iconst(ty, x as i64)
-        }
-    }
-}
-
-fn part_to_value(pos: &mut FuncCursor, root: Inst, part: Part<ValueOrInst>) -> Option<Value> {
-    match part {
-        Part::Instruction(ValueOrInst::Inst(inst)) => {
-            pos.func.dfg.inst_results(inst).first().copied()
-        }
-        Part::Instruction(ValueOrInst::Value(v)) => Some(v),
-        Part::Constant(c) => Some(const_to_value(pos.ins(), c, root)),
-        Part::ConditionCode(_) => None,
-    }
-}
-
-impl Opcode {
-    fn to_peepmatic_operator(&self) -> Option<Operator> {
-        macro_rules! convert {
-            ( $( $op:ident $(,)* )* ) => {
-                match self {
-                    $( Self::$op => Some(Operator::$op), )*
-                    _ => None,
-                }
-            }
-        }
-
-        convert!(
-            AdjustSpDown,
-            AdjustSpDownImm,
-            Band,
-            BandImm,
-            Bconst,
-            Bint,
-            Bor,
-            BorImm,
-            Brnz,
-            Brz,
-            Bxor,
-            BxorImm,
-            Iadd,
-            IaddImm,
-            Icmp,
-            IcmpImm,
-            Iconst,
-            Ifcmp,
-            IfcmpImm,
-            Imul,
-            ImulImm,
-            Ireduce,
-            IrsubImm,
-            Ishl,
-            IshlImm,
-            Isub,
-            Rotl,
-            RotlImm,
-            Rotr,
-            RotrImm,
-            Sdiv,
-            SdivImm,
-            Select,
-            Sextend,
-            Srem,
-            SremImm,
-            Sshr,
-            SshrImm,
-            Trapnz,
-            Trapz,
-            Udiv,
-            UdivImm,
-            Uextend,
-            Urem,
-            UremImm,
-            Ushr,
-            UshrImm,
-        )
-    }
-}
-
-impl TryFrom<Constant> for Imm64 {
-    type Error = &'static str;
-
-    fn try_from(c: Constant) -> Result<Self, Self::Error> {
-        match c {
-            Constant::Int(x, _) => Ok(Imm64::from(x as i64)),
-            Constant::Bool(..) => Err("cannot create Imm64 from Constant::Bool"),
-        }
-    }
-}
-
-impl Into<Constant> for Imm64 {
-    #[inline]
-    fn into(self) -> Constant {
-        let x: i64 = self.into();
-        Constant::Int(x as _, BitWidth::SixtyFour)
-    }
-}
-
-impl Into<Part<ValueOrInst>> for Imm64 {
-    #[inline]
-    fn into(self) -> Part<ValueOrInst> {
-        let c: Constant = self.into();
-        c.into()
-    }
-}
-
-fn part_to_imm64(pos: &mut FuncCursor, part: Part<ValueOrInst>) -> Imm64 {
-    return match part {
-        Part::Instruction(x) => match x.to_constant(pos).unwrap_or_else(|| cannot_convert()) {
-            Constant::Int(x, _) => (x as i64).into(),
-            Constant::Bool(..) => cannot_convert(),
-        },
-        Part::Constant(Constant::Int(x, _)) => (x as i64).into(),
-        Part::ConditionCode(_) | Part::Constant(Constant::Bool(..)) => cannot_convert(),
-    };
-
-    #[inline(never)]
-    #[cold]
-    fn cannot_convert() -> ! {
-        panic!("cannot convert part into `Imm64`")
-    }
-}
-
-impl Into<Constant> for Uimm64 {
-    #[inline]
-    fn into(self) -> Constant {
-        let x: u64 = self.into();
-        Constant::Int(x, BitWidth::SixtyFour)
-    }
-}
-
-impl Into<Part<ValueOrInst>> for Uimm64 {
-    #[inline]
-    fn into(self) -> Part<ValueOrInst> {
-        let c: Constant = self.into();
-        c.into()
-    }
-}
-
-fn peepmatic_to_intcc(cc: ConditionCode) -> IntCC {
-    match cc {
-        ConditionCode::Eq => IntCC::Equal,
-        ConditionCode::Ne => IntCC::NotEqual,
-        ConditionCode::Slt => IntCC::SignedLessThan,
-        ConditionCode::Sle => IntCC::SignedGreaterThanOrEqual,
-        ConditionCode::Sgt => IntCC::SignedGreaterThan,
-        ConditionCode::Sge => IntCC::SignedLessThanOrEqual,
-        ConditionCode::Ult => IntCC::UnsignedLessThan,
-        ConditionCode::Uge => IntCC::UnsignedGreaterThanOrEqual,
-        ConditionCode::Ugt => IntCC::UnsignedGreaterThan,
-        ConditionCode::Ule => IntCC::UnsignedLessThanOrEqual,
-        ConditionCode::Of => IntCC::Overflow,
-        ConditionCode::Nof => IntCC::NotOverflow,
-    }
-}
-
-fn intcc_to_peepmatic(cc: IntCC) -> ConditionCode {
-    match cc {
-        IntCC::Equal => ConditionCode::Eq,
-        IntCC::NotEqual => ConditionCode::Ne,
-        IntCC::SignedLessThan => ConditionCode::Slt,
-        IntCC::SignedGreaterThanOrEqual => ConditionCode::Sle,
-        IntCC::SignedGreaterThan => ConditionCode::Sgt,
-        IntCC::SignedLessThanOrEqual => ConditionCode::Sge,
-        IntCC::UnsignedLessThan => ConditionCode::Ult,
-        IntCC::UnsignedGreaterThanOrEqual => ConditionCode::Uge,
-        IntCC::UnsignedGreaterThan => ConditionCode::Ugt,
-        IntCC::UnsignedLessThanOrEqual => ConditionCode::Ule,
-        IntCC::Overflow => ConditionCode::Of,
-        IntCC::NotOverflow => ConditionCode::Nof,
-    }
-}
-
-fn get_immediate(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Part<ValueOrInst> {
-    return match dfg[inst] {
-        InstructionData::BinaryImm64 { imm, .. } if i == 0 => imm.into(),
-        InstructionData::BranchIcmp { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::BranchInt { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::IntCompare { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::IntCompareImm { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::IntCompareImm { imm, .. } if i == 1 => imm.into(),
-        InstructionData::IntCond { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::IntCondTrap { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::IntSelect { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
-        InstructionData::UnaryBool { imm, .. } if i == 0 => {
-            Constant::Bool(imm, BitWidth::Polymorphic).into()
-        }
-        InstructionData::UnaryImm { imm, .. } if i == 0 => imm.into(),
-        ref otherwise => unsupported(otherwise),
-    };
-
-    #[inline(never)]
-    #[cold]
-    fn unsupported(data: &InstructionData) -> ! {
-        panic!("unsupported instruction data: {:?}", data)
-    }
-}
-
-fn get_argument(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Option<Value> {
-    dfg.inst_args(inst).get(i).copied()
-}
-
-fn peepmatic_ty_to_ir_ty(ty: Type, dfg: &DataFlowGraph, root: Inst) -> types::Type {
-    match (ty.kind, bit_width(dfg, ty.bit_width, root)) {
-        (Kind::Int, 8) => types::I8,
-        (Kind::Int, 16) => types::I16,
-        (Kind::Int, 32) => types::I32,
-        (Kind::Int, 64) => types::I64,
-        (Kind::Int, 128) => types::I128,
-        (Kind::Bool, 1) => types::B1,
-        (Kind::Bool, 8) => types::I8,
-        (Kind::Bool, 16) => types::I16,
-        (Kind::Bool, 32) => types::I32,
-        (Kind::Bool, 64) => types::I64,
-        (Kind::Bool, 128) => types::I128,
-        _ => unreachable!(),
-    }
-}
-
-// NB: the unsafe contract we must uphold here is that our implementation of
-// `instruction_result_bit_width` must always return a valid, non-zero bit
-// width.
-unsafe impl<'a, 'b> InstructionSet<'b> for &'a dyn TargetIsa {
-    type Context = FuncCursor<'b>;
-
-    type Instruction = ValueOrInst;
-
-    fn replace_instruction(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        old: ValueOrInst,
-        new: Part<ValueOrInst>,
-    ) -> ValueOrInst {
-        log::trace!("replace {:?} with {:?}", old, new);
-        let old_inst = old.resolve_inst(&pos.func.dfg).unwrap();
-
-        // Try to convert `new` to an instruction, because we prefer replacing
-        // an old instruction with a new one wholesale. However, if the
-        // replacement cannot be converted to an instruction (e.g. the
-        // right-hand side is a block/function parameter value) then we change
-        // the old instruction's result to an alias of the new value.
-        let new_inst = match new {
-            Part::Instruction(ValueOrInst::Inst(inst)) => Some(inst),
-            Part::Instruction(ValueOrInst::Value(_)) => {
-                // Do not try and follow the value definition. If we transplant
-                // this value's instruction, and there are other uses of this
-                // value, then we could mess up ordering between instructions.
-                None
-            }
-            Part::Constant(c) => {
-                let v = const_to_value(pos.ins(), c, old_inst);
-                let inst = pos.func.dfg.value_def(v).unwrap_inst();
-                Some(inst)
-            }
-            Part::ConditionCode(_) => None,
-        };
-
-        match new_inst {
-            Some(new_inst) => {
-                pos.func.transplant_inst(old_inst, new_inst);
-                debug_assert_eq!(pos.current_inst(), Some(old_inst));
-                old_inst.into()
-            }
-            None => {
-                let new_value = part_to_value(pos, old_inst, new).unwrap();
-
-                let old_results = pos.func.dfg.detach_results(old_inst);
-                let old_results = old_results.as_slice(&pos.func.dfg.value_lists);
-                assert_eq!(old_results.len(), 1);
-                let old_value = old_results[0];
-
-                pos.func.dfg.change_to_alias(old_value, new_value);
-                pos.func.dfg.replace(old_inst).nop();
-
-                new_value.into()
-            }
-        }
-    }
-
-    fn get_part_at_path(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        root: ValueOrInst,
-        path: Path,
-    ) -> Option<Part<ValueOrInst>> {
-        // The root is path [0].
-        debug_assert!(!path.0.is_empty());
-        debug_assert_eq!(path.0[0], 0);
-
-        let mut part = Part::Instruction(root);
-        for p in path.0[1..].iter().copied() {
-            let inst = part.as_instruction()?.resolve_inst(&pos.func.dfg)?;
-            let operator = pos.func.dfg[inst].opcode().to_peepmatic_operator()?;
-
-            if p < operator.immediates_arity() {
-                part = get_immediate(&pos.func.dfg, inst, p as usize);
-                continue;
-            }
-
-            let arg = p - operator.immediates_arity();
-            let arg = arg as usize;
-            let value = get_argument(&pos.func.dfg, inst, arg)?;
-            part = Part::Instruction(value.into());
-        }
-
-        log::trace!("get_part_at_path({:?}) = {:?}", path, part);
-        Some(part)
-    }
-
-    fn operator(&self, pos: &mut FuncCursor<'b>, value_or_inst: ValueOrInst) -> Option<Operator> {
-        let inst = value_or_inst.resolve_inst(&pos.func.dfg)?;
-        pos.func.dfg[inst].opcode().to_peepmatic_operator()
-    }
-
-    fn make_inst_1(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        root: ValueOrInst,
-        operator: Operator,
-        r#type: Type,
-        a: Part<ValueOrInst>,
-    ) -> ValueOrInst {
-        log::trace!("make_inst_1: {:?}({:?})", operator, a);
-
-        let root = root.resolve_inst(&pos.func.dfg).unwrap();
-        match operator {
-            Operator::AdjustSpDown => {
-                let a = part_to_value(pos, root, a).unwrap();
-                pos.ins().adjust_sp_down(a).into()
-            }
-            Operator::AdjustSpDownImm => {
-                let c = a.unwrap_constant();
-                let imm = Imm64::try_from(c).unwrap();
-                pos.ins().adjust_sp_down_imm(imm).into()
-            }
-            Operator::Bconst => {
-                let c = a.unwrap_constant();
-                let val = const_to_value(pos.ins(), c, root);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Bint => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
-                let val = pos.ins().bint(ty, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Brnz => {
-                let a = part_to_value(pos, root, a).unwrap();
-
-                // NB: branching instructions must be the root of an
-                // optimization's right-hand side, so we get the destination
-                // block and arguments from the left-hand side's root. Peepmatic
-                // doesn't currently represent labels or varargs.
-                let block = pos.func.dfg[root].branch_destination().unwrap();
-                let args = pos.func.dfg.inst_args(root)[1..].to_vec();
-
-                pos.ins().brnz(a, block, &args).into()
-            }
-            Operator::Brz => {
-                let a = part_to_value(pos, root, a).unwrap();
-
-                // See the comment in the `Operator::Brnz` match argm.
-                let block = pos.func.dfg[root].branch_destination().unwrap();
-                let args = pos.func.dfg.inst_args(root)[1..].to_vec();
-
-                pos.ins().brz(a, block, &args).into()
-            }
-            Operator::Iconst => {
-                let a = a.unwrap_constant();
-                let val = const_to_value(pos.ins(), a, root);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Ireduce => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
-                let val = pos.ins().ireduce(ty, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Sextend => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
-                let val = pos.ins().sextend(ty, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Trapnz => {
-                let a = part_to_value(pos, root, a).unwrap();
-
-                // NB: similar to branching instructions (see comment in the
-                // `Operator::Brnz` match arm) trapping instructions must be the
-                // root of an optimization's right-hand side, and we get the
-                // trap code from the root of the left-hand side. Peepmatic
-                // doesn't currently represent trap codes.
-                let code = pos.func.dfg[root].trap_code().unwrap();
-
-                pos.ins().trapnz(a, code).into()
-            }
-            Operator::Trapz => {
-                let a = part_to_value(pos, root, a).unwrap();
-                // See comment in the `Operator::Trapnz` match arm.
-                let code = pos.func.dfg[root].trap_code().unwrap();
-                pos.ins().trapz(a, code).into()
-            }
-            Operator::Uextend => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
-                let val = pos.ins().uextend(ty, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            _ => unreachable!(),
-        }
-    }
-
-    fn make_inst_2(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        root: ValueOrInst,
-        operator: Operator,
-        _: Type,
-        a: Part<ValueOrInst>,
-        b: Part<ValueOrInst>,
-    ) -> ValueOrInst {
-        log::trace!("make_inst_2: {:?}({:?}, {:?})", operator, a, b);
-
-        let root = root.resolve_inst(&pos.func.dfg).unwrap();
-        match operator {
-            Operator::Band => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().band(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::BandImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().band_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Bor => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().bor(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::BorImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().bor_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Bxor => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().bxor(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::BxorImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().bxor_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Iadd => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().iadd(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::IaddImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().iadd_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Ifcmp => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ifcmp(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::IfcmpImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ifcmp_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Imul => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().imul(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::ImulImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().imul_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::IrsubImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().irsub_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Ishl => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ishl(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::IshlImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ishl_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Isub => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().isub(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Rotl => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().rotl(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::RotlImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().rotl_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Rotr => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().rotr(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::RotrImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().rotr_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Sdiv => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().sdiv(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::SdivImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().sdiv_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Srem => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().srem(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::SremImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().srem_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Sshr => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().sshr(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::SshrImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().sshr_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Udiv => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().udiv(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::UdivImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().udiv_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Urem => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().urem(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::UremImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().urem_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Ushr => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ushr(a, b);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::UshrImm => {
-                let a = part_to_imm64(pos, a);
-                let b = part_to_value(pos, root, b).unwrap();
-                let val = pos.ins().ushr_imm(b, a);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            _ => unreachable!(),
-        }
-    }
-
-    fn make_inst_3(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        root: ValueOrInst,
-        operator: Operator,
-        _: Type,
-        a: Part<ValueOrInst>,
-        b: Part<ValueOrInst>,
-        c: Part<ValueOrInst>,
-    ) -> ValueOrInst {
-        log::trace!("make_inst_3: {:?}({:?}, {:?}, {:?})", operator, a, b, c);
-
-        let root = root.resolve_inst(&pos.func.dfg).unwrap();
-        match operator {
-            Operator::Icmp => {
-                let cond = a.unwrap_condition_code();
-                let cond = peepmatic_to_intcc(cond);
-                let b = part_to_value(pos, root, b).unwrap();
-                let c = part_to_value(pos, root, c).unwrap();
-                let val = pos.ins().icmp(cond, b, c);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::IcmpImm => {
-                let cond = a.unwrap_condition_code();
-                let cond = peepmatic_to_intcc(cond);
-                let imm = part_to_imm64(pos, b);
-                let c = part_to_value(pos, root, c).unwrap();
-                let val = pos.ins().icmp_imm(cond, c, imm);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            Operator::Select => {
-                let a = part_to_value(pos, root, a).unwrap();
-                let b = part_to_value(pos, root, b).unwrap();
-                let c = part_to_value(pos, root, c).unwrap();
-                let val = pos.ins().select(a, b, c);
-                pos.func.dfg.value_def(val).unwrap_inst().into()
-            }
-            _ => unreachable!(),
-        }
-    }
-
-    fn instruction_to_constant(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        value_or_inst: ValueOrInst,
-    ) -> Option<Constant> {
-        value_or_inst.to_constant(pos)
-    }
-
-    fn instruction_result_bit_width(
-        &self,
-        pos: &mut FuncCursor<'b>,
-        value_or_inst: ValueOrInst,
-    ) -> u8 {
-        value_or_inst.result_bit_width(&pos.func.dfg)
-    }
-
-    fn native_word_size_in_bits(&self, _pos: &mut FuncCursor<'b>) -> u8 {
-        self.pointer_bits()
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/postopt.rs
+++ b/third_party/rust/cranelift-codegen/src/postopt.rs
@ -271,42 +271,6 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
                        .replace(inst)
                        .sload32_complex(info.flags, &args, info.offset);
                }
-                Opcode::Uload8x8 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .uload8x8_complex(info.flags, &args, info.offset);
-                }
-                Opcode::Sload8x8 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .sload8x8_complex(info.flags, &args, info.offset);
-                }
-                Opcode::Uload16x4 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .uload16x4_complex(info.flags, &args, info.offset);
-                }
-                Opcode::Sload16x4 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .sload16x4_complex(info.flags, &args, info.offset);
-                }
-                Opcode::Uload32x2 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .uload32x2_complex(info.flags, &args, info.offset);
-                }
-                Opcode::Sload32x2 => {
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .sload32x2_complex(info.flags, &args, info.offset);
-                }
                Opcode::Store => {
                    pos.func.dfg.replace(inst).store_complex(
                        info.flags,
@ -341,7 +305,7 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
                }
                _ => panic!("Unsupported load or store opcode"),
            },
-            InstructionData::BinaryImm64 {
+            InstructionData::BinaryImm {
                opcode: Opcode::IaddImm,
                arg,
                imm,
--- a/third_party/rust/cranelift-codegen/src/preopt.peepmatic
+++ b/third_party/rust/cranelift-codegen/src/preopt.peepmatic
@ -1,193 +0,0 @@
-;; Apply basic simplifications.
-;;
-;; This folds constants with arithmetic to form `_imm` instructions, and other
-;; minor simplifications.
-;;
-;; Doesn't apply some simplifications if the native word width (in bytes) is
-;; smaller than the controlling type's width of the instruction. This would
-;; result in an illegal instruction that would likely be expanded back into an
-;; instruction on smaller types with the same initial opcode, creating
-;; unnecessary churn.
-
-;; Binary instructions whose second argument is constant.
-(=> (when (iadd $x $C)
-      (fits-in-native-word $C))
-    (iadd_imm $C $x))
-(=> (when (imul $x $C)
-      (fits-in-native-word $C))
-    (imul_imm $C $x))
-(=> (when (sdiv $x $C)
-      (fits-in-native-word $C))
-    (sdiv_imm $C $x))
-(=> (when (udiv $x $C)
-      (fits-in-native-word $C))
-    (udiv_imm $C $x))
-(=> (when (srem $x $C)
-      (fits-in-native-word $C))
-    (srem_imm $C $x))
-(=> (when (urem $x $C)
-      (fits-in-native-word $C))
-    (urem_imm $C $x))
-(=> (when (band $x $C)
-      (fits-in-native-word $C))
-    (band_imm $C $x))
-(=> (when (bor $x $C)
-      (fits-in-native-word $C))
-    (bor_imm $C $x))
-(=> (when (bxor $x $C)
-      (fits-in-native-word $C))
-    (bxor_imm $C $x))
-(=> (when (rotl $x $C)
-      (fits-in-native-word $C))
-    (rotl_imm $C $x))
-(=> (when (rotr $x $C)
-      (fits-in-native-word $C))
-    (rotr_imm $C $x))
-(=> (when (ishl $x $C)
-      (fits-in-native-word $C))
-    (ishl_imm $C $x))
-(=> (when (ushr $x $C)
-      (fits-in-native-word $C))
-    (ushr_imm $C $x))
-(=> (when (sshr $x $C)
-      (fits-in-native-word $C))
-    (sshr_imm $C $x))
-(=> (when (isub $x $C)
-      (fits-in-native-word $C))
-    (iadd_imm $(neg $C) $x))
-(=> (when (ifcmp $x $C)
-      (fits-in-native-word $C))
-    (ifcmp_imm $C $x))
-(=> (when (icmp $cond $x $C)
-      (fits-in-native-word $C))
-    (icmp_imm $cond $C $x))
-
-;; Binary instructions whose first operand is constant.
-(=> (when (iadd $C $x)
-      (fits-in-native-word $C))
-    (iadd_imm $C $x))
-(=> (when (imul $C $x)
-      (fits-in-native-word $C))
-    (imul_imm $C $x))
-(=> (when (band $C $x)
-      (fits-in-native-word $C))
-    (band_imm $C $x))
-(=> (when (bor $C $x)
-      (fits-in-native-word $C))
-    (bor_imm $C $x))
-(=> (when (bxor $C $x)
-      (fits-in-native-word $C))
-    (bxor_imm $C $x))
-(=> (when (isub $C $x)
-      (fits-in-native-word $C))
-    (irsub_imm $C $x))
-
-;; Unary instructions whose operand is constant.
-(=> (adjust_sp_down $C) (adjust_sp_down_imm $C))
-
-;; Fold `(binop_imm $C1 (binop_imm $C2 $x))` into `(binop_imm $(binop $C2 $C1) $x)`.
-(=> (iadd_imm $C1 (iadd_imm $C2 $x)) (iadd_imm $(iadd $C1 $C2) $x))
-(=> (imul_imm $C1 (imul_imm $C2 $x)) (imul_imm $(imul $C1 $C2) $x))
-(=> (bor_imm $C1 (bor_imm $C2 $x)) (bor_imm $(bor $C1 $C2) $x))
-(=> (band_imm $C1 (band_imm $C2 $x)) (band_imm $(band $C1 $C2) $x))
-(=> (bxor_imm $C1 (bxor_imm $C2 $x)) (bxor_imm $(bxor $C1 $C2) $x))
-
-;; Remove operations that are no-ops.
-(=> (iadd_imm 0 $x) $x)
-(=> (imul_imm 1 $x) $x)
-(=> (sdiv_imm 1 $x) $x)
-(=> (udiv_imm 1 $x) $x)
-(=> (bor_imm 0 $x) $x)
-(=> (band_imm -1 $x) $x)
-(=> (bxor_imm 0 $x) $x)
-(=> (rotl_imm 0 $x) $x)
-(=> (rotr_imm 0 $x) $x)
-(=> (ishl_imm 0 $x) $x)
-(=> (ushr_imm 0 $x) $x)
-(=> (sshr_imm 0 $x) $x)
-
-;; Replace with zero.
-(=> (imul_imm 0 $x) 0)
-(=> (band_imm 0 $x) 0)
-
-;; Replace with negative 1.
-(=> (bor_imm -1 $x) -1)
-
-;; Transform `[(x << N) >> N]` into a (un)signed-extending move.
-;;
-;; i16 -> i8 -> i16
-(=> (when (ushr_imm 8 (ishl_imm 8 $x))
-      (bit-width $x 16))
-    (uextend{i16} (ireduce{i8} $x)))
-(=> (when (sshr_imm 8 (ishl_imm 8 $x))
-      (bit-width $x 16))
-    (sextend{i16} (ireduce{i8} $x)))
-;; i32 -> i8 -> i32
-(=> (when (ushr_imm 24 (ishl_imm 24 $x))
-      (bit-width $x 32))
-    (uextend{i32} (ireduce{i8} $x)))
-(=> (when (sshr_imm 24 (ishl_imm 24 $x))
-      (bit-width $x 32))
-    (sextend{i32} (ireduce{i8} $x)))
-;; i32 -> i16 -> i32
-(=> (when (ushr_imm 16 (ishl_imm 16 $x))
-      (bit-width $x 32))
-    (uextend{i32} (ireduce{i16} $x)))
-(=> (when (sshr_imm 16 (ishl_imm 16 $x))
-      (bit-width $x 32))
-    (sextend{i32} (ireduce{i16} $x)))
-;; i64 -> i8 -> i64
-(=> (when (ushr_imm 56 (ishl_imm 56 $x))
-      (bit-width $x 64))
-    (uextend{i64} (ireduce{i8} $x)))
-(=> (when (sshr_imm 56 (ishl_imm 56 $x))
-      (bit-width $x 64))
-    (sextend{i64} (ireduce{i8} $x)))
-;; i64 -> i16 -> i64
-(=> (when (ushr_imm 48 (ishl_imm 48 $x))
-      (bit-width $x 64))
-    (uextend{i64} (ireduce{i16} $x)))
-(=> (when (sshr_imm 48 (ishl_imm 48 $x))
-      (bit-width $x 64))
-    (sextend{i64} (ireduce{i16} $x)))
-;; i64 -> i32 -> i64
-(=> (when (ushr_imm 32 (ishl_imm 32 $x))
-      (bit-width $x 64))
-    (uextend{i64} (ireduce{i32} $x)))
-(=> (when (sshr_imm 32 (ishl_imm 32 $x))
-      (bit-width $x 64))
-    (sextend{i64} (ireduce{i32} $x)))
-
-;; Fold away redundant `bint` instructions that accept both integer and boolean
-;; arguments.
-(=> (select (bint $x) $y $z) (select $x $y $z))
-(=> (brz (bint $x)) (brz $x))
-(=> (brnz (bint $x)) (brnz $x))
-(=> (trapz (bint $x)) (trapz $x))
-(=> (trapnz (bint $x)) (trapnz $x))
-
-;; Fold comparisons into branch operations when possible.
-;;
-;; This matches against operations which compare against zero, then use the
-;; result in a `brz` or `brnz` branch. It folds those two operations into a
-;; single `brz` or `brnz`.
-(=> (brnz (icmp_imm ne 0 $x)) (brnz $x))
-(=> (brz (icmp_imm ne 0 $x)) (brz $x))
-(=> (brnz (icmp_imm eq 0 $x)) (brz $x))
-(=> (brz (icmp_imm eq 0 $x)) (brnz $x))
-
-;; Division and remainder by constants.
-;;
-;; TODO: this section is incomplete, and a bunch of related optimizations are
-;; still hand-coded in `simple_preopt.rs`.
-
-;; (Division by one is handled above.)
-
-;; Remainder by one is zero.
-(=> (urem_imm 1 $x) 0)
-(=> (srem_imm 1 $x) 0)
-
-;; Division by a power of two -> shift right.
-(=> (when (udiv_imm $C $x)
-          (is-power-of-two $C))
-    (ushr_imm $(log2 $C) $x))
--- a/third_party/rust/cranelift-codegen/src/preopt.serialized
+++ b/third_party/rust/cranelift-codegen/src/preopt.serialized
--- a/third_party/rust/cranelift-codegen/src/remove_constant_phis.rs
+++ b/third_party/rust/cranelift-codegen/src/remove_constant_phis.rs
@ -1,393 +0,0 @@
-//! A Constant-Phi-Node removal pass.
-
-use log::info;
-
-use crate::dominator_tree::DominatorTree;
-use crate::entity::EntityList;
-use crate::fx::FxHashMap;
-use crate::fx::FxHashSet;
-use crate::ir::instructions::BranchInfo;
-use crate::ir::Function;
-use crate::ir::{Block, Inst, Value};
-use crate::timing;
-
-use smallvec::{smallvec, SmallVec};
-use std::vec::Vec;
-
-// A note on notation.  For the sake of clarity, this file uses the phrase
-// "formal parameters" to mean the `Value`s listed in the block head, and
-// "actual parameters" to mean the `Value`s passed in a branch or a jump:
-//
-// block4(v16: i32, v18: i32):    <-- formal parameters
-//   ...
-//   brnz v27, block7(v22, v24)   <-- actual parameters
-//   jump block6
-
-// This transformation pass (conceptually) partitions all values in the
-// function into two groups:
-//
-// * Group A: values defined by block formal parameters, except for the entry block.
-//
-// * Group B: All other values: that is, values defined by instructions,
-//   and the formals of the entry block.
-//
-// For each value in Group A, it attempts to establish whether it will have
-// the value of exactly one member of Group B.  If so, the formal parameter is
-// deleted, all corresponding actual parameters (in jumps/branches to the
-// defining block) are deleted, and a rename is inserted.
-//
-// The entry block is special-cased because (1) we don't know what values flow
-// to its formals and (2) in any case we can't change its formals.
-//
-// Work proceeds in three phases.
-//
-// * Phase 1: examine all instructions.  For each block, make up a useful
-//   grab-bag of information, `BlockSummary`, that summarises the block's
-//   formals and jump/branch instruction.  This is used by Phases 2 and 3.
-//
-// * Phase 2: for each value in Group A, try to find a single Group B value
-//   that flows to it.  This is done using a classical iterative forward
-//   dataflow analysis over a simple constant-propagation style lattice.  It
-//   converges quickly in practice -- I have seen at most 4 iterations.  This
-//   is relatively cheap because the iteration is done over the
-//   `BlockSummary`s, and does not visit each instruction.  The resulting
-//   fixed point is stored in a `SolverState`.
-//
-// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
-//   remove redundant formals and actuals, and to insert suitable renames.
-//
-// Note that the effectiveness of the analysis depends on on the fact that
-// there are no copy instructions in Cranelift's IR.  If there were, the
-// computation of `actual_absval` in Phase 2 would have to be extended to
-// chase through such copies.
-//
-// For large functions, the analysis cost using the new AArch64 backend is about
-// 0.6% of the non-optimising compile time, as measured by instruction counts.
-// This transformation usually pays for itself several times over, though, by
-// reducing the isel/regalloc cost downstream.  Gains of up to 7% have been
-// seen for large functions.
-
-// The `Value`s (Group B) that can flow to a formal parameter (Group A).
-#[derive(Clone, Copy, Debug, PartialEq)]
-enum AbstractValue {
-    // Two or more values flow to this formal.
-    Many,
-    // Exactly one value, as stated, flows to this formal.  The `Value`s that
-    // can appear here are exactly: `Value`s defined by `Inst`s, plus the
-    // `Value`s defined by the formals of the entry block.  Note that this is
-    // exactly the set of `Value`s that are *not* tracked in the solver below
-    // (see `SolverState`).
-    One(Value /*Group B*/),
-    // No value flows to this formal.
-    None,
-}
-
-impl AbstractValue {
-    fn join(self, other: AbstractValue) -> AbstractValue {
-        match (self, other) {
-            // Joining with `None` has no effect
-            (AbstractValue::None, p2) => p2,
-            (p1, AbstractValue::None) => p1,
-            // Joining with `Many` produces `Many`
-            (AbstractValue::Many, _p2) => AbstractValue::Many,
-            (_p1, AbstractValue::Many) => AbstractValue::Many,
-            // The only interesting case
-            (AbstractValue::One(v1), AbstractValue::One(v2)) => {
-                if v1 == v2 {
-                    AbstractValue::One(v1)
-                } else {
-                    AbstractValue::Many
-                }
-            }
-        }
-    }
-    fn is_one(self) -> bool {
-        if let AbstractValue::One(_) = self {
-            true
-        } else {
-            false
-        }
-    }
-}
-
-// For some block, a useful bundle of info.  The `Block` itself is not stored
-// here since it will be the key in the associated `FxHashMap` -- see
-// `summaries` below.  For the `SmallVec` tuning params: most blocks have
-// few parameters, hence `4`.  And almost all blocks have either one or two
-// successors, hence `2`.
-#[derive(Debug)]
-struct BlockSummary {
-    // Formal parameters for this `Block`
-    formals: SmallVec<[Value; 4] /*Group A*/>,
-    // For each `Inst` in this block that transfers to another block: the
-    // `Inst` itself, the destination `Block`, and the actual parameters
-    // passed.  We don't bother to include transfers that pass zero parameters
-    // since that makes more work for the solver for no purpose.
-    dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
-}
-impl BlockSummary {
-    fn new(formals: SmallVec<[Value; 4]>) -> Self {
-        Self {
-            formals,
-            dests: smallvec![],
-        }
-    }
-}
-
-// Solver state.  This holds a AbstractValue for each formal parameter, except
-// for those from the entry block.
-struct SolverState {
-    absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
-}
-impl SolverState {
-    fn new() -> Self {
-        Self {
-            absvals: FxHashMap::default(),
-        }
-    }
-    fn get(&self, actual: Value) -> AbstractValue {
-        match self.absvals.get(&actual) {
-            Some(lp) => *lp,
-            None => panic!("SolverState::get: formal param {:?} is untracked?!", actual),
-        }
-    }
-    fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
-        self.absvals.get(&actual)
-    }
-    fn set(&mut self, actual: Value, lp: AbstractValue) {
-        match self.absvals.insert(actual, lp) {
-            Some(_old_lp) => {}
-            None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
-        }
-    }
-}
-
-/// Detect phis in `func` that will only ever produce one value, using a
-/// classic forward dataflow analysis.  Then remove them.
-#[inline(never)]
-pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
-    let _tt = timing::remove_constant_phis();
-    debug_assert!(domtree.is_valid());
-
-    // Get the blocks, in reverse postorder
-    let mut blocks_reverse_postorder = Vec::<Block>::new();
-    for block in domtree.cfg_postorder() {
-        blocks_reverse_postorder.push(*block);
-    }
-    blocks_reverse_postorder.reverse();
-
-    // Phase 1 of 3: for each block, make a summary containing all relevant
-    // info.  The solver will iterate over the summaries, rather than having
-    // to inspect each instruction in each block.
-    let mut summaries = FxHashMap::<Block, BlockSummary>::default();
-
-    for b in &blocks_reverse_postorder {
-        let formals = func.dfg.block_params(*b);
-        let mut summary = BlockSummary::new(SmallVec::from(formals));
-
-        for inst in func.layout.block_insts(*b) {
-            let idetails = &func.dfg[inst];
-            // Note that multi-dest transfers (i.e., branch tables) don't
-            // carry parameters in our IR, so we only have to care about
-            // `SingleDest` here.
-            if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
-            {
-                let inst_var_args = func.dfg.inst_variable_args(inst);
-                // Skip branches/jumps that carry no params.
-                if inst_var_args.len() > 0 {
-                    let mut actuals = SmallVec::<[Value; 4]>::new();
-                    for arg in inst_var_args {
-                        let arg = func.dfg.resolve_aliases(*arg);
-                        actuals.push(arg);
-                    }
-                    summary.dests.push((inst, dest, actuals));
-                }
-            }
-        }
-
-        // Ensure the invariant that all blocks (except for the entry) appear
-        // in the summary, *unless* they have neither formals nor any
-        // param-carrying branches/jumps.
-        if formals.len() > 0 || summary.dests.len() > 0 {
-            summaries.insert(*b, summary);
-        }
-    }
-
-    // Phase 2 of 3: iterate over the summaries in reverse postorder,
-    // computing new `AbstractValue`s for each tracked `Value`.  The set of
-    // tracked `Value`s is exactly Group A as described above.
-
-    let entry_block = func
-        .layout
-        .entry_block()
-        .expect("remove_constant_phis: entry block unknown");
-
-    // Set up initial solver state
-    let mut state = SolverState::new();
-
-    for b in &blocks_reverse_postorder {
-        // For each block, get the formals
-        if *b == entry_block {
-            continue;
-        }
-        let formals: &[Value] = func.dfg.block_params(*b);
-        for formal in formals {
-            let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
-            assert!(mb_old_absval.is_none());
-        }
-    }
-
-    // Solve: repeatedly traverse the blocks in reverse postorder, until there
-    // are no changes.
-    let mut iter_no = 0;
-    loop {
-        iter_no += 1;
-        let mut changed = false;
-
-        for src in &blocks_reverse_postorder {
-            let mb_src_summary = summaries.get(src);
-            // The src block might have no summary.  This means it has no
-            // branches/jumps that carry parameters *and* it doesn't take any
-            // parameters itself.  Phase 1 ensures this.  So we can ignore it.
-            if mb_src_summary.is_none() {
-                continue;
-            }
-            let src_summary = mb_src_summary.unwrap();
-            for (_inst, dst, src_actuals) in &src_summary.dests {
-                assert!(*dst != entry_block);
-                // By contrast, the dst block must have a summary.  Phase 1
-                // will have only included an entry in `src_summary.dests` if
-                // that branch/jump carried at least one parameter.  So the
-                // dst block does take parameters, so it must have a summary.
-                let dst_summary = summaries
-                    .get(dst)
-                    .expect("remove_constant_phis: dst block has no summary");
-                let dst_formals = &dst_summary.formals;
-                assert!(src_actuals.len() == dst_formals.len());
-                for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
-                    // Find the abstract value for `actual`.  If it is a block
-                    // formal parameter then the most recent abstract value is
-                    // to be found in the solver state.  If not, then it's a
-                    // real value defining point (not a phi), in which case
-                    // return it itself.
-                    let actual_absval = match state.maybe_get(*actual) {
-                        Some(pt) => *pt,
-                        None => AbstractValue::One(*actual),
-                    };
-
-                    // And `join` the new value with the old.
-                    let formal_absval_old = state.get(*formal);
-                    let formal_absval_new = formal_absval_old.join(actual_absval);
-                    if formal_absval_new != formal_absval_old {
-                        changed = true;
-                        state.set(*formal, formal_absval_new);
-                    }
-                }
-            }
-        }
-
-        if !changed {
-            break;
-        }
-    }
-    let mut n_consts = 0;
-    for absval in state.absvals.values() {
-        if absval.is_one() {
-            n_consts += 1;
-        }
-    }
-
-    // Phase 3 of 3: edit the function to remove constant formals, using the
-    // summaries and the final solver state as a guide.
-
-    // Make up a set of blocks that need editing.
-    let mut need_editing = FxHashSet::<Block>::default();
-    for (block, summary) in &summaries {
-        if *block == entry_block {
-            continue;
-        }
-        for formal in &summary.formals {
-            let formal_absval = state.get(*formal);
-            if formal_absval.is_one() {
-                need_editing.insert(*block);
-                break;
-            }
-        }
-    }
-
-    // Firstly, deal with the formals.  For each formal which is redundant,
-    // remove it, and also add a reroute from it to the constant value which
-    // it we know it to be.
-    for b in &need_editing {
-        let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
-        let formals: &[Value] = func.dfg.block_params(*b);
-        for formal in formals {
-            // The state must give an absval for `formal`.
-            if let AbstractValue::One(replacement_val) = state.get(*formal) {
-                del_these.push((*formal, replacement_val));
-            }
-        }
-        // We can delete the formals in any order.  However,
-        // `remove_block_param` works by sliding backwards all arguments to
-        // the right of the it is asked to delete.  Hence when removing more
-        // than one formal, it is significantly more efficient to ask it to
-        // remove the rightmost formal first, and hence this `reverse`.
-        del_these.reverse();
-        for (redundant_formal, replacement_val) in del_these {
-            func.dfg.remove_block_param(redundant_formal);
-            func.dfg.change_to_alias(redundant_formal, replacement_val);
-        }
-    }
-
-    // Secondly, visit all branch insns.  If the destination has had its
-    // formals changed, change the actuals accordingly.  Don't scan all insns,
-    // rather just visit those as listed in the summaries we prepared earlier.
-    for (_src_block, summary) in &summaries {
-        for (inst, dst_block, _src_actuals) in &summary.dests {
-            if !need_editing.contains(dst_block) {
-                continue;
-            }
-
-            let old_actuals = func.dfg[*inst].take_value_list().unwrap();
-            let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
-            let num_fixed_actuals = func.dfg[*inst]
-                .opcode()
-                .constraints()
-                .num_fixed_value_arguments();
-            let dst_summary = summaries.get(&dst_block).unwrap();
-
-            // Check that the numbers of arguments make sense.
-            assert!(num_fixed_actuals <= num_old_actuals);
-            assert!(num_fixed_actuals + dst_summary.formals.len() == num_old_actuals);
-
-            // Create a new value list.
-            let mut new_actuals = EntityList::<Value>::new();
-            // Copy the fixed args to the new list
-            for i in 0..num_fixed_actuals {
-                let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
-                new_actuals.push(val, &mut func.dfg.value_lists);
-            }
-
-            // Copy the variable args (the actual block params) to the new
-            // list, filtering out redundant ones.
-            for i in 0..dst_summary.formals.len() {
-                let actual_i = old_actuals
-                    .get(num_fixed_actuals + i, &func.dfg.value_lists)
-                    .unwrap();
-                let formal_i = dst_summary.formals[i];
-                let is_redundant = state.get(formal_i).is_one();
-                if !is_redundant {
-                    new_actuals.push(actual_i, &mut func.dfg.value_lists);
-                }
-            }
-            func.dfg[*inst].put_value_list(new_actuals);
-        }
-    }
-
-    info!(
-        "do_remove_constant_phis: done, {} iters.   {} formals, of which {} const.",
-        iter_no,
-        state.absvals.len(),
-        n_consts
-    );
-}
--- a/third_party/rust/cranelift-codegen/src/simple_preopt.rs
+++ b/third_party/rust/cranelift-codegen/src/simple_preopt.rs
@ -10,8 +10,10 @@ use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::{
    condcodes::{CondCode, IntCC},
-    instructions::Opcode,
-    types::{I32, I64},
+    dfg::ValueDef,
+    immediates,
+    instructions::{Opcode, ValueList},
+    types::{I16, I32, I64, I8},
    Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
 };
 use crate::isa::TargetIsa;
@ -142,7 +144,7 @@ fn package_up_divrem_info(
 /// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
 /// signedness, operation size and div-vs-rem-ness in a handy bundle.
 fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
-    if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
+    if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] {
        let (is_signed, is_rem) = match opcode {
            Opcode::UdivImm => (false, false),
            Opcode::UremImm => (false, true),
@ -466,6 +468,340 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
    }
 }

+#[inline]
+fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
+    if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
+        if let InstructionData::UnaryImm {
+            opcode: Opcode::Iconst,
+            imm,
+        } = dfg[candidate_inst]
+        {
+            return Some(imm);
+        }
+    }
+    None
+}
+
+/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
+/// Returns true if the final instruction has been converted to such a move.
+fn try_fold_extended_move(
+    pos: &mut FuncCursor,
+    inst: Inst,
+    opcode: Opcode,
+    arg: Value,
+    imm: immediates::Imm64,
+) -> bool {
+    if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+        if let InstructionData::BinaryImm {
+            opcode: Opcode::IshlImm,
+            arg: prev_arg,
+            imm: prev_imm,
+        } = &pos.func.dfg[arg_inst]
+        {
+            if imm != *prev_imm {
+                return false;
+            }
+
+            let dest_ty = pos.func.dfg.ctrl_typevar(inst);
+            if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
+                return false;
+            }
+
+            let imm_bits: i64 = imm.into();
+            let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
+                8 => I8,
+                16 => I16,
+                32 => I32,
+                _ => return false,
+            };
+            let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
+
+            // This becomes a no-op, since ireduce_ty has a smaller lane width than
+            // the argument type (also the destination type).
+            let arg = *prev_arg;
+            let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
+
+            if opcode == Opcode::UshrImm {
+                pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
+            } else {
+                pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
+            }
+            return true;
+        }
+    }
+    false
+}
+
+/// Apply basic simplifications.
+///
+/// This folds constants with arithmetic to form `_imm` instructions, and other minor
+/// simplifications.
+///
+/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
+/// controlling type's width of the instruction. This would result in an illegal instruction that
+/// would likely be expanded back into an instruction on smaller types with the same initial
+/// opcode, creating unnecessary churn.
+fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
+    match pos.func.dfg[inst] {
+        InstructionData::Binary { opcode, args } => {
+            if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                let new_opcode = match opcode {
+                    Opcode::Iadd => Opcode::IaddImm,
+                    Opcode::Imul => Opcode::ImulImm,
+                    Opcode::Sdiv => Opcode::SdivImm,
+                    Opcode::Udiv => Opcode::UdivImm,
+                    Opcode::Srem => Opcode::SremImm,
+                    Opcode::Urem => Opcode::UremImm,
+                    Opcode::Band => Opcode::BandImm,
+                    Opcode::Bor => Opcode::BorImm,
+                    Opcode::Bxor => Opcode::BxorImm,
+                    Opcode::Rotl => Opcode::RotlImm,
+                    Opcode::Rotr => Opcode::RotrImm,
+                    Opcode::Ishl => Opcode::IshlImm,
+                    Opcode::Ushr => Opcode::UshrImm,
+                    Opcode::Sshr => Opcode::SshrImm,
+                    Opcode::Isub => {
+                        imm = imm.wrapping_neg();
+                        Opcode::IaddImm
+                    }
+                    Opcode::Ifcmp => Opcode::IfcmpImm,
+                    _ => return,
+                };
+                let ty = pos.func.dfg.ctrl_typevar(inst);
+                if ty.bytes() <= native_word_width {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .BinaryImm(new_opcode, ty, imm, args[0]);
+
+                    // Repeat for BinaryImm simplification.
+                    simplify(pos, inst, native_word_width);
+                }
+            } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
+                let new_opcode = match opcode {
+                    Opcode::Iadd => Opcode::IaddImm,
+                    Opcode::Imul => Opcode::ImulImm,
+                    Opcode::Band => Opcode::BandImm,
+                    Opcode::Bor => Opcode::BorImm,
+                    Opcode::Bxor => Opcode::BxorImm,
+                    Opcode::Isub => Opcode::IrsubImm,
+                    _ => return,
+                };
+                let ty = pos.func.dfg.ctrl_typevar(inst);
+                if ty.bytes() <= native_word_width {
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .BinaryImm(new_opcode, ty, imm, args[1]);
+                }
+            }
+        }
+
+        InstructionData::Unary { opcode, arg } => {
+            if let Opcode::AdjustSpDown = opcode {
+                if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
+                    // Note this works for both positive and negative immediate values.
+                    pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
+                }
+            }
+        }
+
+        InstructionData::BinaryImm { opcode, arg, imm } => {
+            let ty = pos.func.dfg.ctrl_typevar(inst);
+
+            let mut arg = arg;
+            let mut imm = imm;
+            match opcode {
+                Opcode::IaddImm
+                | Opcode::ImulImm
+                | Opcode::BorImm
+                | Opcode::BandImm
+                | Opcode::BxorImm => {
+                    // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
+                    if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+                        if let InstructionData::BinaryImm {
+                            opcode: prev_opcode,
+                            arg: prev_arg,
+                            imm: prev_imm,
+                        } = &pos.func.dfg[arg_inst]
+                        {
+                            if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) {
+                                let lhs: i64 = imm.into();
+                                let rhs: i64 = (*prev_imm).into();
+                                let new_imm = match opcode {
+                                    Opcode::BorImm => lhs | rhs,
+                                    Opcode::BandImm => lhs & rhs,
+                                    Opcode::BxorImm => lhs ^ rhs,
+                                    Opcode::IaddImm => lhs.wrapping_add(rhs),
+                                    Opcode::ImulImm => lhs.wrapping_mul(rhs),
+                                    _ => panic!("can't happen"),
+                                };
+                                let new_imm = immediates::Imm64::from(new_imm);
+                                let new_arg = *prev_arg;
+                                pos.func
+                                    .dfg
+                                    .replace(inst)
+                                    .BinaryImm(opcode, ty, new_imm, new_arg);
+                                imm = new_imm;
+                                arg = new_arg;
+                            }
+                        }
+                    }
+                }
+
+                Opcode::UshrImm | Opcode::SshrImm => {
+                    if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
+                        && try_fold_extended_move(pos, inst, opcode, arg, imm)
+                    {
+                        return;
+                    }
+                }
+
+                _ => {}
+            };
+
+            // Replace operations that are no-ops.
+            match (opcode, imm.into()) {
+                (Opcode::IaddImm, 0)
+                | (Opcode::ImulImm, 1)
+                | (Opcode::SdivImm, 1)
+                | (Opcode::UdivImm, 1)
+                | (Opcode::BorImm, 0)
+                | (Opcode::BandImm, -1)
+                | (Opcode::BxorImm, 0)
+                | (Opcode::RotlImm, 0)
+                | (Opcode::RotrImm, 0)
+                | (Opcode::IshlImm, 0)
+                | (Opcode::UshrImm, 0)
+                | (Opcode::SshrImm, 0) => {
+                    // Alias the result value with the original argument.
+                    replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
+                }
+                (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
+                    // Replace by zero.
+                    pos.func.dfg.replace(inst).iconst(ty, 0);
+                }
+                (Opcode::BorImm, -1) => {
+                    // Replace by minus one.
+                    pos.func.dfg.replace(inst).iconst(ty, -1);
+                }
+                _ => {}
+            }
+        }
+
+        InstructionData::IntCompare { opcode, cond, args } => {
+            debug_assert_eq!(opcode, Opcode::Icmp);
+            if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
+                    pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                }
+            }
+        }
+
+        InstructionData::CondTrap { .. }
+        | InstructionData::Branch { .. }
+        | InstructionData::Ternary {
+            opcode: Opcode::Select,
+            ..
+        } => {
+            // Fold away a redundant `bint`.
+            let condition_def = {
+                let args = pos.func.dfg.inst_args(inst);
+                pos.func.dfg.value_def(args[0])
+            };
+            if let ValueDef::Result(def_inst, _) = condition_def {
+                if let InstructionData::Unary {
+                    opcode: Opcode::Bint,
+                    arg: bool_val,
+                } = pos.func.dfg[def_inst]
+                {
+                    let args = pos.func.dfg.inst_args_mut(inst);
+                    args[0] = bool_val;
+                }
+            }
+        }
+
+        _ => {}
+    }
+}
+
+struct BranchOptInfo {
+    br_inst: Inst,
+    cmp_arg: Value,
+    args: ValueList,
+    new_opcode: Opcode,
+}
+
+/// Fold comparisons into branch operations when possible.
+///
+/// This matches against operations which compare against zero, then use the
+/// result in a `brz` or `brnz` branch. It folds those two operations into a
+/// single `brz` or `brnz`.
+fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
+    let mut info = if let InstructionData::Branch {
+        opcode: br_opcode,
+        args: ref br_args,
+        ..
+    } = pos.func.dfg[inst]
+    {
+        let first_arg = {
+            let args = pos.func.dfg.inst_args(inst);
+            args[0]
+        };
+
+        let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
+            icmp_inst
+        } else {
+            return;
+        };
+
+        if let InstructionData::IntCompareImm {
+            opcode: Opcode::IcmpImm,
+            arg: cmp_arg,
+            cond: cmp_cond,
+            imm: cmp_imm,
+        } = pos.func.dfg[icmp_inst]
+        {
+            let cmp_imm: i64 = cmp_imm.into();
+            if cmp_imm != 0 {
+                return;
+            }
+
+            // icmp_imm returns non-zero when the comparison is true. So, if
+            // we're branching on zero, we need to invert the condition.
+            let cond = match br_opcode {
+                Opcode::Brz => cmp_cond.inverse(),
+                Opcode::Brnz => cmp_cond,
+                _ => return,
+            };
+
+            let new_opcode = match cond {
+                IntCC::Equal => Opcode::Brz,
+                IntCC::NotEqual => Opcode::Brnz,
+                _ => return,
+            };
+
+            BranchOptInfo {
+                br_inst: inst,
+                cmp_arg,
+                args: br_args.clone(),
+                new_opcode,
+            }
+        } else {
+            return;
+        }
+    } else {
+        return;
+    };
+
+    info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
+    if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
+        *opcode = info.new_opcode;
+    } else {
+        panic!();
+    }
+}
+
 enum BranchOrderKind {
    BrzToBrnz(Value),
    BrnzToBrz(Value),
@ -608,490 +944,15 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block,
    cfg.recompute_block(pos.func, block);
 }

-#[cfg(feature = "enable-peepmatic")]
-mod simplify {
-    use super::*;
-    use crate::peepmatic::ValueOrInst;
-
-    pub type PeepholeOptimizer<'a, 'b> =
-        peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
-
-    pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
-        crate::peepmatic::preopt(isa)
-    }
-
-    pub fn apply_all<'a, 'b>(
-        optimizer: &mut PeepholeOptimizer<'a, 'b>,
-        pos: &mut FuncCursor<'a>,
-        inst: Inst,
-        _native_word_width: u32,
-    ) {
-        // After we apply one optimization, that might make another
-        // optimization applicable. Keep running the peephole optimizer
-        // until either:
-        //
-        // * No optimization applied, and therefore it doesn't make sense to
-        //   try again, because no optimization will apply again.
-        //
-        // * Or when we replaced an instruction with an alias to an existing
-        //   value, because we already ran the peephole optimizer over the
-        //   aliased value's instruction in an early part of the traversal
-        //   over the function.
-        while let Some(ValueOrInst::Inst(new_inst)) =
-            optimizer.apply_one(pos, ValueOrInst::Inst(inst))
-        {
-            // We transplanted a new instruction into the current
-            // instruction, so the "new" instruction is actually the same
-            // one, just with different data.
-            debug_assert_eq!(new_inst, inst);
-        }
-        debug_assert_eq!(pos.current_inst(), Some(inst));
-    }
-}
-
-#[cfg(not(feature = "enable-peepmatic"))]
-mod simplify {
-    use super::*;
-    use crate::ir::{
-        dfg::ValueDef,
-        immediates,
-        instructions::{Opcode, ValueList},
-        types::{B8, I16, I32, I8},
-    };
-    use std::marker::PhantomData;
-
-    pub struct PeepholeOptimizer<'a, 'b> {
-        phantom: PhantomData<(&'a (), &'b ())>,
-    }
-
-    pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
-        PeepholeOptimizer {
-            phantom: PhantomData,
-        }
-    }
-
-    pub fn apply_all<'a, 'b>(
-        _optimizer: &mut PeepholeOptimizer<'a, 'b>,
-        pos: &mut FuncCursor<'a>,
-        inst: Inst,
-        native_word_width: u32,
-    ) {
-        simplify(pos, inst, native_word_width);
-        branch_opt(pos, inst);
-    }
-
-    #[inline]
-    fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
-        if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
-            if let InstructionData::UnaryImm {
-                opcode: Opcode::Iconst,
-                imm,
-            } = dfg[candidate_inst]
-            {
-                return Some(imm);
-            }
-        }
-        None
-    }
-
-    /// Try to transform [(x << N) >> N] into a (un)signed-extending move.
-    /// Returns true if the final instruction has been converted to such a move.
-    fn try_fold_extended_move(
-        pos: &mut FuncCursor,
-        inst: Inst,
-        opcode: Opcode,
-        arg: Value,
-        imm: immediates::Imm64,
-    ) -> bool {
-        if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
-            if let InstructionData::BinaryImm64 {
-                opcode: Opcode::IshlImm,
-                arg: prev_arg,
-                imm: prev_imm,
-            } = &pos.func.dfg[arg_inst]
-            {
-                if imm != *prev_imm {
-                    return false;
-                }
-
-                let dest_ty = pos.func.dfg.ctrl_typevar(inst);
-                if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
-                    return false;
-                }
-
-                let imm_bits: i64 = imm.into();
-                let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
-                    8 => I8,
-                    16 => I16,
-                    32 => I32,
-                    _ => return false,
-                };
-                let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
-
-                // This becomes a no-op, since ireduce_ty has a smaller lane width than
-                // the argument type (also the destination type).
-                let arg = *prev_arg;
-                let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
-
-                if opcode == Opcode::UshrImm {
-                    pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
-                } else {
-                    pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
-                }
-                return true;
-            }
-        }
-        false
-    }
-
-    /// Apply basic simplifications.
-    ///
-    /// This folds constants with arithmetic to form `_imm` instructions, and other minor
-    /// simplifications.
-    ///
-    /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
-    /// controlling type's width of the instruction. This would result in an illegal instruction that
-    /// would likely be expanded back into an instruction on smaller types with the same initial
-    /// opcode, creating unnecessary churn.
-    fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
-        match pos.func.dfg[inst] {
-            InstructionData::Binary { opcode, args } => {
-                if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
-                    let new_opcode = match opcode {
-                        Opcode::Iadd => Opcode::IaddImm,
-                        Opcode::Imul => Opcode::ImulImm,
-                        Opcode::Sdiv => Opcode::SdivImm,
-                        Opcode::Udiv => Opcode::UdivImm,
-                        Opcode::Srem => Opcode::SremImm,
-                        Opcode::Urem => Opcode::UremImm,
-                        Opcode::Band => Opcode::BandImm,
-                        Opcode::Bor => Opcode::BorImm,
-                        Opcode::Bxor => Opcode::BxorImm,
-                        Opcode::Rotl => Opcode::RotlImm,
-                        Opcode::Rotr => Opcode::RotrImm,
-                        Opcode::Ishl => Opcode::IshlImm,
-                        Opcode::Ushr => Opcode::UshrImm,
-                        Opcode::Sshr => Opcode::SshrImm,
-                        Opcode::Isub => {
-                            imm = imm.wrapping_neg();
-                            Opcode::IaddImm
-                        }
-                        Opcode::Ifcmp => Opcode::IfcmpImm,
-                        _ => return,
-                    };
-                    let ty = pos.func.dfg.ctrl_typevar(inst);
-                    if ty.bytes() <= native_word_width {
-                        pos.func
-                            .dfg
-                            .replace(inst)
-                            .BinaryImm64(new_opcode, ty, imm, args[0]);
-
-                        // Repeat for BinaryImm simplification.
-                        simplify(pos, inst, native_word_width);
-                    }
-                } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
-                    let new_opcode = match opcode {
-                        Opcode::Iadd => Opcode::IaddImm,
-                        Opcode::Imul => Opcode::ImulImm,
-                        Opcode::Band => Opcode::BandImm,
-                        Opcode::Bor => Opcode::BorImm,
-                        Opcode::Bxor => Opcode::BxorImm,
-                        Opcode::Isub => Opcode::IrsubImm,
-                        _ => return,
-                    };
-                    let ty = pos.func.dfg.ctrl_typevar(inst);
-                    if ty.bytes() <= native_word_width {
-                        pos.func
-                            .dfg
-                            .replace(inst)
-                            .BinaryImm64(new_opcode, ty, imm, args[1]);
-                    }
-                }
-            }
-
-            InstructionData::Unary { opcode, arg } => {
-                if let Opcode::AdjustSpDown = opcode {
-                    if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
-                        // Note this works for both positive and negative immediate values.
-                        pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
-                    }
-                }
-            }
-
-            InstructionData::BinaryImm64 { opcode, arg, imm } => {
-                let ty = pos.func.dfg.ctrl_typevar(inst);
-
-                let mut arg = arg;
-                let mut imm = imm;
-                match opcode {
-                    Opcode::IaddImm
-                    | Opcode::ImulImm
-                    | Opcode::BorImm
-                    | Opcode::BandImm
-                    | Opcode::BxorImm => {
-                        // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
-                        if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
-                            if let InstructionData::BinaryImm64 {
-                                opcode: prev_opcode,
-                                arg: prev_arg,
-                                imm: prev_imm,
-                            } = &pos.func.dfg[arg_inst]
-                            {
-                                if opcode == *prev_opcode
-                                    && ty == pos.func.dfg.ctrl_typevar(arg_inst)
-                                {
-                                    let lhs: i64 = imm.into();
-                                    let rhs: i64 = (*prev_imm).into();
-                                    let new_imm = match opcode {
-                                        Opcode::BorImm => lhs | rhs,
-                                        Opcode::BandImm => lhs & rhs,
-                                        Opcode::BxorImm => lhs ^ rhs,
-                                        Opcode::IaddImm => lhs.wrapping_add(rhs),
-                                        Opcode::ImulImm => lhs.wrapping_mul(rhs),
-                                        _ => panic!("can't happen"),
-                                    };
-                                    let new_imm = immediates::Imm64::from(new_imm);
-                                    let new_arg = *prev_arg;
-                                    pos.func
-                                        .dfg
-                                        .replace(inst)
-                                        .BinaryImm64(opcode, ty, new_imm, new_arg);
-                                    imm = new_imm;
-                                    arg = new_arg;
-                                }
-                            }
-                        }
-                    }
-
-                    Opcode::UshrImm | Opcode::SshrImm => {
-                        if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
-                            && try_fold_extended_move(pos, inst, opcode, arg, imm)
-                        {
-                            return;
-                        }
-                    }
-
-                    _ => {}
-                };
-
-                // Replace operations that are no-ops.
-                match (opcode, imm.into()) {
-                    (Opcode::IaddImm, 0)
-                    | (Opcode::ImulImm, 1)
-                    | (Opcode::SdivImm, 1)
-                    | (Opcode::UdivImm, 1)
-                    | (Opcode::BorImm, 0)
-                    | (Opcode::BandImm, -1)
-                    | (Opcode::BxorImm, 0)
-                    | (Opcode::RotlImm, 0)
-                    | (Opcode::RotrImm, 0)
-                    | (Opcode::IshlImm, 0)
-                    | (Opcode::UshrImm, 0)
-                    | (Opcode::SshrImm, 0) => {
-                        // Alias the result value with the original argument.
-                        replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
-                    }
-                    (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
-                        // Replace by zero.
-                        pos.func.dfg.replace(inst).iconst(ty, 0);
-                    }
-                    (Opcode::BorImm, -1) => {
-                        // Replace by minus one.
-                        pos.func.dfg.replace(inst).iconst(ty, -1);
-                    }
-                    _ => {}
-                }
-            }
-
-            InstructionData::IntCompare { opcode, cond, args } => {
-                debug_assert_eq!(opcode, Opcode::Icmp);
-                if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
-                    if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
-                        pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
-                    }
-                }
-            }
-
-            InstructionData::CondTrap { .. }
-            | InstructionData::Branch { .. }
-            | InstructionData::Ternary {
-                opcode: Opcode::Select,
-                ..
-            } => {
-                // Fold away a redundant `bint`.
-                let condition_def = {
-                    let args = pos.func.dfg.inst_args(inst);
-                    pos.func.dfg.value_def(args[0])
-                };
-                if let ValueDef::Result(def_inst, _) = condition_def {
-                    if let InstructionData::Unary {
-                        opcode: Opcode::Bint,
-                        arg: bool_val,
-                    } = pos.func.dfg[def_inst]
-                    {
-                        let args = pos.func.dfg.inst_args_mut(inst);
-                        args[0] = bool_val;
-                    }
-                }
-            }
-
-            InstructionData::Ternary {
-                opcode: Opcode::Bitselect,
-                args,
-            } => {
-                let old_cond_type = pos.func.dfg.value_type(args[0]);
-                if !old_cond_type.is_vector() {
-                    return;
-                }
-
-                // Replace bitselect with vselect if each lane of controlling mask is either
-                // all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
-                // while vselect can be encoded using single BLEND instruction.
-                if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
-                    let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
-                        InstructionData::Unary {
-                            opcode: Opcode::RawBitcast,
-                            arg,
-                        } => {
-                            // If controlling mask is raw-bitcasted boolean vector then
-                            // we know each lane is either all zeroes or ones,
-                            // so we can use vselect instruction instead.
-                            let arg_type = pos.func.dfg.value_type(arg);
-                            if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
-                                return;
-                            }
-                            (arg, arg_type)
-                        }
-                        InstructionData::UnaryConst {
-                            opcode: Opcode::Vconst,
-                            constant_handle,
-                        } => {
-                            // If each byte of controlling mask is 0x00 or 0xFF then
-                            // we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
-                            // Bitselect operates at bit level, so the lane types don't matter.
-                            let const_data = pos.func.dfg.constants.get(constant_handle);
-                            if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
-                                return;
-                            }
-                            let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
-                            (pos.ins().raw_bitcast(new_type, args[0]), new_type)
-                        }
-                        _ => return,
-                    };
-
-                    let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
-                    let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
-                    let old_arg_type = pos.func.dfg.value_type(args[1]);
-
-                    if arg_type != old_arg_type {
-                        // Operands types must match, we need to add bitcasts.
-                        let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
-                        let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
-                        let ret = pos.ins().vselect(cond_val, arg1, arg2);
-                        pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
-                    } else {
-                        pos.func
-                            .dfg
-                            .replace(inst)
-                            .vselect(cond_val, args[1], args[2]);
-                    }
-                }
-            }
-
-            _ => {}
-        }
-    }
-
-    struct BranchOptInfo {
-        br_inst: Inst,
-        cmp_arg: Value,
-        args: ValueList,
-        new_opcode: Opcode,
-    }
-
-    /// Fold comparisons into branch operations when possible.
-    ///
-    /// This matches against operations which compare against zero, then use the
-    /// result in a `brz` or `brnz` branch. It folds those two operations into a
-    /// single `brz` or `brnz`.
-    fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
-        let mut info = if let InstructionData::Branch {
-            opcode: br_opcode,
-            args: ref br_args,
-            ..
-        } = pos.func.dfg[inst]
-        {
-            let first_arg = {
-                let args = pos.func.dfg.inst_args(inst);
-                args[0]
-            };
-
-            let icmp_inst =
-                if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
-                    icmp_inst
-                } else {
-                    return;
-                };
-
-            if let InstructionData::IntCompareImm {
-                opcode: Opcode::IcmpImm,
-                arg: cmp_arg,
-                cond: cmp_cond,
-                imm: cmp_imm,
-            } = pos.func.dfg[icmp_inst]
-            {
-                let cmp_imm: i64 = cmp_imm.into();
-                if cmp_imm != 0 {
-                    return;
-                }
-
-                // icmp_imm returns non-zero when the comparison is true. So, if
-                // we're branching on zero, we need to invert the condition.
-                let cond = match br_opcode {
-                    Opcode::Brz => cmp_cond.inverse(),
-                    Opcode::Brnz => cmp_cond,
-                    _ => return,
-                };
-
-                let new_opcode = match cond {
-                    IntCC::Equal => Opcode::Brz,
-                    IntCC::NotEqual => Opcode::Brnz,
-                    _ => return,
-                };
-
-                BranchOptInfo {
-                    br_inst: inst,
-                    cmp_arg,
-                    args: br_args.clone(),
-                    new_opcode,
-                }
-            } else {
-                return;
-            }
-        } else {
-            return;
-        };
-
-        info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
-        if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
-            *opcode = info.new_opcode;
-        } else {
-            panic!();
-        }
-    }
-}
-
 /// The main pre-opt pass.
 pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
    let _tt = timing::preopt();
-
    let mut pos = FuncCursor::new(func);
-    let native_word_width = isa.pointer_bytes() as u32;
-    let mut optimizer = simplify::peephole_optimizer(isa);
-
+    let native_word_width = isa.pointer_bytes();
    while let Some(block) = pos.next_block() {
        while let Some(inst) = pos.next_inst() {
-            simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
+            // Apply basic simplifications.
+            simplify(&mut pos, inst, native_word_width as u32);

            // Try to transform divide-by-constant into simpler operations.
            if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
@ -1099,6 +960,7 @@ pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn Targ
                continue;
            }

+            branch_opt(&mut pos, inst);
            branch_order(&mut pos, cfg, block, inst);
        }
    }
--- a/third_party/rust/cranelift-codegen/src/timing.rs
+++ b/third_party/rust/cranelift-codegen/src/timing.rs
@ -62,7 +62,6 @@ define_passes! {
    gvn: "Global value numbering",
    licm: "Loop invariant code motion",
    unreachable_code: "Remove unreachable blocks",
-    remove_constant_phis: "Remove constant phi-nodes",

    regalloc: "Register allocation",
    ra_liveness: "RA liveness analysis",
--- a/third_party/rust/cranelift-codegen/src/value_label.rs
+++ b/third_party/rust/cranelift-codegen/src/value_label.rs
@ -18,9 +18,9 @@ use serde::{Deserialize, Serialize};
 pub struct ValueLocRange {
    /// The ValueLoc containing a ValueLabel during this range.
    pub loc: ValueLoc,
-    /// The start of the range. It is an offset in the generated code.
+    /// The start of the range.
    pub start: u32,
-    /// The end of the range. It is an offset in the generated code.
+    /// The end of the range.
    pub end: u32,
 }

@ -91,11 +91,6 @@ pub fn build_value_labels_ranges<T>(
 where
    T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
 {
-    // FIXME(#1523): New-style backend does not yet have debug info.
-    if isa.get_mach_backend().is_some() {
-        return HashMap::new();
-    }
-
    let values_labels = build_value_labels_index::<T>(func);

    let mut blocks = func.layout.blocks().collect::<Vec<_>>();
--- a/third_party/rust/cranelift-codegen/src/verifier/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/verifier/mod.rs
@ -756,10 +756,10 @@ impl<'a> Verifier<'a> {
            | UnaryIeee64 { .. }
            | UnaryBool { .. }
            | Binary { .. }
-            | BinaryImm8 { .. }
-            | BinaryImm64 { .. }
+            | BinaryImm { .. }
            | Ternary { .. }
-            | TernaryImm8 { .. }
+            | InsertLane { .. }
+            | ExtractLane { .. }
            | Shuffle { .. }
            | IntCompare { .. }
            | IntCompareImm { .. }
@ -1912,20 +1912,20 @@ impl<'a> Verifier<'a> {
                    Ok(())
                }
            }
-            ir::InstructionData::BinaryImm8 {
+            ir::InstructionData::ExtractLane {
                opcode: ir::instructions::Opcode::Extractlane,
-                imm: lane,
+                lane,
                arg,
                ..
            }
-            | ir::InstructionData::TernaryImm8 {
+            | ir::InstructionData::InsertLane {
                opcode: ir::instructions::Opcode::Insertlane,
-                imm: lane,
+                lane,
                args: [arg, _],
                ..
            } => {
                // We must be specific about the opcodes above because other instructions are using
-                // the same formats.
+                // the ExtractLane/InsertLane formats.
                let ty = self.func.dfg.value_type(arg);
                if u16::from(lane) >= ty.lane_count() {
                    errors.fatal((
--- a/third_party/rust/cranelift-codegen/src/write.rs
+++ b/third_party/rust/cranelift-codegen/src/write.rs
@ -508,8 +508,7 @@ pub fn write_operands(
            constant_handle, ..
        } => write!(w, " {}", constant_handle),
        Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]),
-        BinaryImm8 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
-        BinaryImm64 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
+        BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
        Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]),
        MultiAry { ref args, .. } => {
            if args.is_empty() {
@ -519,7 +518,8 @@ pub fn write_operands(
            }
        }
        NullAry { .. } => write!(w, " "),
-        TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm),
+        InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
+        ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
        Shuffle { mask, args, .. } => {
            let data = dfg.immediates.get(mask).expect(
                "Expected the shuffle mask to already be inserted into the immediates table",
--- a/third_party/rust/cranelift-entity/.cargo-checksum.json
+++ b/third_party/rust/cranelift-entity/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"c4ee5d42f3f76a1458ec0d97b5777569906819fe5b4002512de0e69814754c53","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
+{"files":{"Cargo.toml":"cd1dd7e4040349ff8e5e88cbc3273c2b52cb411853933de6aea8976a1a99445f","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"f6d738a46f1dca8b0c82a5910d86cd572a3585ab7ef9f73dac96962529069190","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"dccb3dd6fc87eba0101de56417f21cab67a4394831df9fa41e3bbddb70cdf694","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
--- a/third_party/rust/cranelift-entity/Cargo.toml
+++ b/third_party/rust/cranelift-entity/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-entity"
-version = "0.64.0"
+version = "0.63.0"
 description = "Data structures using entity references as mapping keys"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-entity"
--- a/third_party/rust/cranelift-entity/src/lib.rs
+++ b/third_party/rust/cranelift-entity/src/lib.rs
@ -85,10 +85,6 @@ macro_rules! entity_impl {
            fn reserved_value() -> $entity {
                $entity($crate::__core::u32::MAX)
            }
-
-            fn is_reserved_value(&self) -> bool {
-                self.0 == $crate::__core::u32::MAX
-            }
        }

        impl $entity {
--- a/third_party/rust/cranelift-entity/src/packed_option.rs
+++ b/third_party/rust/cranelift-entity/src/packed_option.rs
@ -11,11 +11,9 @@ use core::fmt;
 use core::mem;

 /// Types that have a reserved value which can't be created any other way.
-pub trait ReservedValue {
+pub trait ReservedValue: Eq {
    /// Create an instance of the reserved value.
    fn reserved_value() -> Self;
-    /// Checks whether value is the reserved one.
-    fn is_reserved_value(&self) -> bool;
 }

 /// Packed representation of `Option<T>`.
@ -25,12 +23,12 @@ pub struct PackedOption<T: ReservedValue>(T);
 impl<T: ReservedValue> PackedOption<T> {
    /// Returns `true` if the packed option is a `None` value.
    pub fn is_none(&self) -> bool {
-        self.0.is_reserved_value()
+        self.0 == T::reserved_value()
    }

    /// Returns `true` if the packed option is a `Some` value.
    pub fn is_some(&self) -> bool {
-        !self.0.is_reserved_value()
+        self.0 != T::reserved_value()
    }

    /// Expand the packed option into a normal `Option`.
@ -77,7 +75,7 @@ impl<T: ReservedValue> From<T> for PackedOption<T> {
    /// Convert `t` into a packed `Some(x)`.
    fn from(t: T) -> Self {
        debug_assert!(
-            !t.is_reserved_value(),
+            t != T::reserved_value(),
            "Can't make a PackedOption from the reserved value."
        );
        Self(t)
@ -125,10 +123,6 @@ mod tests {
        fn reserved_value() -> Self {
            NoC(13)
        }
-
-        fn is_reserved_value(&self) -> bool {
-            self.0 == 13
-        }
    }

    #[test]
@ -151,10 +145,6 @@ mod tests {
        fn reserved_value() -> Self {
            Ent(13)
        }
-
-        fn is_reserved_value(&self) -> bool {
-            self.0 == 13
-        }
    }

    #[test]
--- a/third_party/rust/cranelift-frontend/.cargo-checksum.json
+++ b/third_party/rust/cranelift-frontend/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"084cc46ba2d09a2ee8085c37be8624b3cc249d381f1cbee6df468930ce15e415","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"d1d8477572f70cc28f71424af272d9eec0adf58af657ff153c4acbbb39822a50","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
+{"files":{"Cargo.toml":"d152c6553c0091b43d9ea0cd547dc49440e6321eb792bf47fdd3245aed046513","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"f750cc995c66635dab7f2b977266cf9235d984b585ab8145bdb858ea8e1b0fb4","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
--- a/Показать больше
+++ b/Показать больше