Bug 1706427 - Update wasmparser and cranelift. r=rhunt

Differential Revision: https://phabricator.services.mozilla.com/D112806
2021-05-04 14:59:40 +00:00 · 2021-05-04 14:59:40 +00:00 · 50fe06fce0
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -47,21 +47,6 @@ git = "https://github.com/mozilla/application-services"
 replace-with = "vendored-sources"
 rev = "8a576fbe79199fa8664f64285524017f74ebcc5f"

-[source."https://github.com/mozilla-spidermonkey/wasmtime"]
-git = "https://github.com/mozilla-spidermonkey/wasmtime"
-replace-with = "vendored-sources"
-rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
-
-[source."https://github.com/mozilla-spidermonkey/wasm-tools"]
-git = "https://github.com/mozilla-spidermonkey/wasm-tools"
-replace-with = "vendored-sources"
-rev = "1b7763faa484e62752538b78e7a69883f4faceee"
-
-[source."https://github.com/mozilla-spidermonkey/regalloc.rs"]
-git = "https://github.com/mozilla-spidermonkey/regalloc.rs"
-replace-with = "vendored-sources"
-rev = "fc5d1d33317b0fbd36725757f80a95127eff5109"
-
 [source."https://github.com/mozilla-spidermonkey/jsparagus"]
 git = "https://github.com/mozilla-spidermonkey/jsparagus"
 replace-with = "vendored-sources"
@ -122,6 +107,11 @@ git = "https://github.com/gfx-rs/d3d12-rs"
 replace-with = "vendored-sources"
 rev = "be19a243b86e0bafb9937d661fc8eabb3e42b44e"

+[source."https://github.com/bytecodealliance/wasmtime"]
+git = "https://github.com/bytecodealliance/wasmtime"
+replace-with = "vendored-sources"
+rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
+
 [source."https://github.com/PLSysSec/rlbox_lucet_sandbox/"]
 git = "https://github.com/PLSysSec/rlbox_lucet_sandbox/"
 replace-with = "vendored-sources"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -779,42 +779,42 @@ dependencies = [

 [[package]]
 name = "cranelift-bforest"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
 dependencies = [
- "cranelift-entity 0.68.0",
+ "cranelift-entity 0.73.0",
 ]

 [[package]]
 name = "cranelift-codegen"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
 dependencies = [
 "byteorder",
 "cranelift-bforest",
 "cranelift-codegen-meta",
 "cranelift-codegen-shared",
- "cranelift-entity 0.68.0",
+ "cranelift-entity 0.73.0",
 "log",
 "regalloc",
 "smallvec",
- "target-lexicon 0.11.0",
+ "target-lexicon 0.12.0",
 "thiserror",
 ]

 [[package]]
 name = "cranelift-codegen-meta"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
 dependencies = [
 "cranelift-codegen-shared",
- "cranelift-entity 0.68.0",
+ "cranelift-entity 0.73.0",
 ]

 [[package]]
 name = "cranelift-codegen-shared"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"

 [[package]]
 name = "cranelift-entity"
@ -823,33 +823,33 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=cd07861d1c9

 [[package]]
 name = "cranelift-entity"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"

 [[package]]
 name = "cranelift-frontend"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
 dependencies = [
 "cranelift-codegen",
 "log",
 "smallvec",
- "target-lexicon 0.11.0",
+ "target-lexicon 0.12.0",
 ]

 [[package]]
 name = "cranelift-wasm"
-version = "0.68.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
+version = "0.73.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
 dependencies = [
 "cranelift-codegen",
- "cranelift-entity 0.68.0",
+ "cranelift-entity 0.73.0",
 "cranelift-frontend",
- "itertools 0.9.0",
+ "itertools 0.10.0",
 "log",
 "smallvec",
 "thiserror",
- "wasmparser 0.67.0",
+ "wasmparser",
 ]

 [[package]]
@ -2539,9 +2539,9 @@ dependencies = [

 [[package]]
 name = "itertools"
-version = "0.9.0"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
+checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319"
 dependencies = [
 "either",
 ]
@ -2648,7 +2648,7 @@ version = "0.1.0"
 dependencies = [
 "jsrust_shared",
 "mozglue-static",
- "wasmparser 0.48.2",
+ "wasmparser",
 "wat",
 ]

@ -4253,7 +4253,8 @@ dependencies = [
 [[package]]
 name = "regalloc"
 version = "0.0.31"
-source = "git+https://github.com/mozilla-spidermonkey/regalloc.rs?rev=fc5d1d33317b0fbd36725757f80a95127eff5109#fc5d1d33317b0fbd36725757f80a95127eff5109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "571f7f397d61c4755285cd37853fe8e03271c243424a907415909379659381c5"
 dependencies = [
 "log",
 "rustc-hash",
@ -5078,9 +5079,9 @@ checksum = "6f4c118a7a38378f305a9e111fcb2f7f838c0be324bfb31a77ea04f7f6e684b4"

 [[package]]
 name = "target-lexicon"
-version = "0.11.0"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe2635952a442a01fd4cb53d98858b5e4bb461b02c0d111f22f31772e3e7a8b2"
+checksum = "64ae3b39281e4b14b8123bdbaddd472b7dfe215e444181f2f9d2443c2444f834"

 [[package]]
 name = "tempfile"
@ -5680,20 +5681,15 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"

 [[package]]
 name = "wasmparser"
-version = "0.48.2"
+version = "0.77.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
-
-[[package]]
-name = "wasmparser"
-version = "0.67.0"
-source = "git+https://github.com/mozilla-spidermonkey/wasm-tools?rev=1b7763faa484e62752538b78e7a69883f4faceee#1b7763faa484e62752538b78e7a69883f4faceee"
+checksum = "b35c86d22e720a07d954ebbed772d01180501afe7d03d464f413bb5f8914a8d6"

 [[package]]
 name = "wast"
-version = "35.0.1"
+version = "35.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a5800e9f86a1eae935e38bea11e60fd253f6d514d153fb39b3e5535a7b37b56"
+checksum = "2ef140f1b49946586078353a453a1d28ba90adfc54dde75710bc1931de204d68"
 dependencies = [
 "leb128",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -86,12 +86,12 @@ minidump_writer_linux = { git = "https://github.com/msirringhaus/minidump_writer
 xmldecl = { git = "https://github.com/hsivonen/xmldecl", rev="a74f7df5bf6cb11194fb74daa0c3cf42f326fd90" }

 [patch.crates-io.cranelift-codegen]
-git = "https://github.com/mozilla-spidermonkey/wasmtime"
-rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
+git = "https://github.com/bytecodealliance/wasmtime"
+rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"

 [patch.crates-io.cranelift-wasm]
-git = "https://github.com/mozilla-spidermonkey/wasmtime"
-rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
+git = "https://github.com/bytecodealliance/wasmtime"
+rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"

 # Patch autocfg to hide rustc output. Workaround for https://github.com/cuviper/autocfg/issues/30
 [patch.crates-io.autocfg]
--- a/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js
@ -9,18 +9,18 @@
 //
 // is equivalent to
 //
-//   (iMxN.mul (iMxN.widen_{high,low}_iKxL_{s,u} A)
-//             (iMxN.widen_{high,low}_iKxL_{s,u} B))
+//   (iMxN.mul (iMxN.extend_{high,low}_iKxL_{s,u} A)
+//             (iMxN.extend_{high,low}_iKxL_{s,u} B))
 //
 // It doesn't really matter what the inputs are, we can test this almost
 // blindly.
 //
-// Unfortunately, we do not yet have i64x2.widen_* so we introduce a helper
+// Unfortunately, we do not yet have i64x2.extend_* so we introduce a helper
 // function to compute that.

 function makeExtMulTest(wide, narrow, part, signed) {
    let widener = (wide == 'i64x2') ?
-        `call $${wide}_widen_${part}_${narrow}_${signed}` :
+        `call $${wide}_extend_${part}_${narrow}_${signed}` :
        `${wide}.extend_${part}_${narrow}_${signed}`;
    return `
    (func (export "${wide}_extmul_${part}_${narrow}_${signed}")
@ -36,21 +36,21 @@ function makeExtMulTest(wide, narrow, part, signed) {
 var ins = wasmEvalText(`
  (module
    (memory (export "mem") 1 1)
-    (func $i64x2_widen_low_i32x4_s (param v128) (result v128)
+    (func $i64x2_extend_low_i32x4_s (param v128) (result v128)
      (i64x2.shr_s (i8x16.shuffle 16 16 16 16 0 1 2 3 16 16 16 16 4 5 6 7
                                  (local.get 0)
                                  (v128.const i32x4 0 0 0 0))
                   (i32.const 32)))
-    (func $i64x2_widen_high_i32x4_s (param v128) (result v128)
+    (func $i64x2_extend_high_i32x4_s (param v128) (result v128)
      (i64x2.shr_s (i8x16.shuffle 16 16 16 16 8 9 10 11 16 16 16 16 12 13 14 15
                                  (local.get 0)
                                  (v128.const i32x4 0 0 0 0))
                   (i32.const 32)))
-    (func $i64x2_widen_low_i32x4_u (param v128) (result v128)
+    (func $i64x2_extend_low_i32x4_u (param v128) (result v128)
      (i8x16.shuffle 0 1 2 3 16 16 16 16 4 5 6 7 16 16 16 16
                     (local.get 0)
                     (v128.const i32x4 0 0 0 0)))
-    (func $i64x2_widen_high_i32x4_u (param v128) (result v128)
+    (func $i64x2_extend_high_i32x4_u (param v128) (result v128)
      (i8x16.shuffle 8 9 10 11 16 16 16 16 12 13 14 15 16 16 16 16
                     (local.get 0)
                     (v128.const i32x4 0 0 0 0)))
@ -132,13 +132,13 @@ assertEq(ins.exports.const_bitmask_i64x2(), 1);
 var ins = wasmEvalText(`
  (module
    (memory (export "mem") 1 1)
-    (func (export "widen_low_i32x4_s")
+    (func (export "extend_low_i32x4_s")
      (v128.store (i32.const 0) (i64x2.extend_low_i32x4_s (v128.load (i32.const 16)))))
-    (func (export "widen_high_i32x4_s")
+    (func (export "extend_high_i32x4_s")
      (v128.store (i32.const 0) (i64x2.extend_high_i32x4_s (v128.load (i32.const 16)))))
-    (func (export "widen_low_i32x4_u")
+    (func (export "extend_low_i32x4_u")
      (v128.store (i32.const 0) (i64x2.extend_low_i32x4_u (v128.load (i32.const 16)))))
-    (func (export "widen_high_i32x4_u")
+    (func (export "extend_high_i32x4_u")
      (v128.store (i32.const 0) (i64x2.extend_high_i32x4_u (v128.load (i32.const 16))))))`);

 var mem32 = new Int32Array(ins.exports.mem.buffer);
@ -148,16 +148,16 @@ var mem64u = new BigUint64Array(ins.exports.mem.buffer);
 var as = [205, 1, 192, 3].map((x) => x << 24);
 set(mem32, 4, as);

-ins.exports.widen_low_i32x4_s();
+ins.exports.extend_low_i32x4_s();
 assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n])))

-ins.exports.widen_high_i32x4_s();
+ins.exports.extend_high_i32x4_s();
 assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n+2])));

-ins.exports.widen_low_i32x4_u();
+ins.exports.extend_low_i32x4_u();
 assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));

-ins.exports.widen_high_i32x4_u();
+ins.exports.extend_high_i32x4_u();
 assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));

 // Saturating rounding q-format multiplication.
--- a/js/src/rust/Cargo.toml
+++ b/js/src/rust/Cargo.toml
@ -21,4 +21,4 @@ jsrust_shared = { path = "./shared" }
 # Workaround for https://github.com/rust-lang/rust/issues/58393
 mozglue-static = { path = "../../../mozglue/static/rust" }
 wat = { version = "1.0.37" }
-wasmparser = { version = "0.48.2" }
+wasmparser = { version = "0.77.0" }
--- a/js/src/rust/wasm.rs
+++ b/js/src/rust/wasm.rs
@ -56,20 +56,31 @@ pub unsafe extern "C" fn wasm_code_offsets(
        }

        let mut offsets = Vec::new();
-        let mut parser = Parser::new(bytes);
-        let mut next_input = ParserInput::Default;

-        while !parser.eof() {
-            let offset = parser.current_position();
-            match parser.read_with_input(next_input) {
-                ParserState::BeginSection { code, .. } if *code != SectionCode::Code => {
-                    next_input = ParserInput::SkipSection;
+        // Read operators offsets and skip invalid data.
+        for payload in Parser::new(0).parse_all(bytes) {
+            if payload.is_err() {
+                break;
+            }
+            match payload.unwrap() {
+                Payload::CodeSectionEntry(body) => {
+                    let reader = match body.get_operators_reader() {
+                        Ok(r) => r,
+                        Err(_) => {
+                            break;
+                        }
+                    };
+                    for pair in reader.into_iter_with_offsets() {
+                        let offset = match pair {
+                            Ok((_op, offset)) => offset,
+                            Err(_) => {
+                                break;
+                            }
+                        };
+                        offsets.push(offset as u32);
+                    }
                }
-                ParserState::CodeOperator(..) => {
-                    offsets.push(offset as u32);
-                    next_input = ParserInput::Default
-                }
-                _ => next_input = ParserInput::Default,
+                _ => (),
            }
        }

--- a/js/src/wasm/cranelift/Cargo.toml
+++ b/js/src/wasm/cranelift/Cargo.toml
@ -13,8 +13,8 @@ name = "baldrdash"
 # cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
 # you should update the following $TOP_LEVEL/Cargo.toml file: look for the
 # revision (rev) hashes of both cranelift dependencies (codegen and wasm).
-cranelift-codegen = { version = "0.68.0", default-features = false }
-cranelift-wasm = { version = "0.68.0" }
+cranelift-codegen = { version = "0.73.0", default-features = false }
+cranelift-wasm = { version = "0.73.0" }
 log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
 env_logger = "0.8"
 smallvec = "1.0"
@ -24,7 +24,7 @@ bindgen = {version = "0.56", default-features = false} # disable `logging` to re

 [features]
 default = ['cranelift-codegen/std']
-cranelift_x86 = ['cranelift-codegen/x64']
+cranelift_x86 = ['cranelift-codegen/x86']
 cranelift_arm32 = ['cranelift-codegen/arm32']
 cranelift_arm64 = ['cranelift-codegen/arm64']

--- a/js/src/wasm/cranelift/src/bindings/mod.rs
+++ b/js/src/wasm/cranelift/src/bindings/mod.rs
@ -380,6 +380,9 @@ impl<'module> wasmparser::WasmModuleResources for ModuleEnvironment<'module> {
            None
        }
    }
+    fn event_at(&self, _at: u32) -> Option<&Self::FuncType> {
+        panic!("unexpected exception operation");
+    }
    fn global_at(&self, at: u32) -> Option<wasmparser::GlobalType> {
        let num_globals = unsafe { low_level::env_num_globals(self.env) };
        if (at as usize) < num_globals {
--- a/js/src/wasm/cranelift/src/compile.rs
+++ b/js/src/wasm/cranelift/src/compile.rs
@ -174,6 +174,7 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
            deterministic_only: true,
            memory64: false,
            multi_memory: false,
+            exceptions: false,
        };
        let sig_index = self.module_env.func_sig_index(index);
        let mut validator =
--- a/js/src/wasm/cranelift/src/wasm2clif.rs
+++ b/js/src/wasm/cranelift/src/wasm2clif.rs
@ -1390,7 +1390,8 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
        Ok(ret.unwrap())
    }

-    fn translate_loop_header(&mut self, mut pos: FuncCursor) -> WasmResult<()> {
+    fn translate_loop_header(&mut self, builder: &mut FunctionBuilder) -> WasmResult<()> {
+        let mut pos = builder.cursor();
        let interrupt = self.load_interrupt_flag(&mut pos);
        pos.ins()
            .resumable_trapnz(interrupt, ir::TrapCode::Interrupt);
--- a/third_party/rust/cranelift-bforest/.cargo-checksum.json
+++ b/third_party/rust/cranelift-bforest/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"b0ed8fc54833fd48846644e3f59fbead46e7a2ff456194e03d04ce8b95404522","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"4204f6bd3dd43dc307a57dc1b3543fc3d31feb4c5c8e64035578a45d88c725b3","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
+{"files":{"Cargo.toml":"7ed6181651b03736af571db011b0c22da1accd1ed581c4637048baeb31ac4460","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"4204f6bd3dd43dc307a57dc1b3543fc3d31feb4c5c8e64035578a45d88c725b3","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
--- a/third_party/rust/cranelift-bforest/Cargo.toml
+++ b/third_party/rust/cranelift-bforest/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-bforest"
-version = "0.68.0"
+version = "0.73.0"
 description = "A forest of B+-trees"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-bforest"
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
 edition = "2018"

 [dependencies]
-cranelift-entity = { path = "../entity", version = "0.68.0", default-features = false }
+cranelift-entity = { path = "../entity", version = "0.73.0", default-features = false }

 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"561ee9a55739ac9716bc2f024e2673d69aefa6edbc4ff8b61a221a1741ed862a","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"a0f5212fa593caf66371f5ee4b15e501939a9407c4663bff6b3ba356b11ca1b4","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"50620fb2a6271a7c9126dc30c433a1bf25646a4d84511f5745650aaaec700f42","src/cdsl/typevar.rs":"3cbe83a09d2402511b20415a8356f848fb82536926386bb42eaaa7740fb2457e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"1ff123ab481b48d82e13363043dfc98eaef837bbf6af485b8259c3863550e29c","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"e9f1645fec6e4b5cfba9b08cfff70f9d1a5ad3b392f5ee9f40cb1a8669a7c689","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"f2d3d1ece43c7f18bd7ef405715cd39f59433d8f33a7fa4d237c1de28528ff7c","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"745ef09f4927b5334d68155fa047910ef96311feef7ec20964bb033c3419cd3c","src/isa/x86/recipes.rs":"744292109344363b2210ac1b42cb4704b4b692aa8bf5583e4230557cf3749298","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"47a5e9fb3b7917cfe817d56dcc77c0470545e451e0f38a875af0531fbd9b6a58","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"14b668244b2afd71197c2dd8469af0e0602d590fcb14252c2b0b40cb9905a4ae","src/shared/immediates.rs":"563fa33accb992eb11a43f0f63259c62a2c44db59801431cc67ceec4b94f2ca3","src/shared/instructions.rs":"21d0f2b041a0bce64d3db614ca003ec9269ba0a31aa5dbdae34cb15e5a59d89f","src/shared/legalize.rs":"eb5f07fa107cadd67483881ccce29cc8fb9b698a0cd4f1d89853aac275cf7bcf","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"e7406ce17fb313fa05397dd8103f74eed67d35170d70b6e546e08954aef2ed87","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
+{"files":{"Cargo.toml":"122ed61f7a5d7859ae59f4969444d5b4b3cdf6c035e4aca8040aa93b393601e8","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"b01e6d51991c6bcc15b40c90d53a1bf9c7ecbc046f7fd1fea1798097db26ffb4","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"b2f2c80a8d24cc9523e3d83219fc3251f24898579a6831e45b7fe34ab74b6207","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"9e4f9aebbc81da3bef61ad4fa4f3be38f5494747b465d2cd95c269cdecb06e09","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"e61f37f6185082dcf41cde9e7edba16c5161dbe40cf40580cd7a6973ed8badbc","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"5bee86362ecb06bf974e1bf79f9fb3e50e81a829b311baf660f0a0c64ea65fdb","src/cdsl/type_inference.rs":"b0834b54176811f3c63a76ccb0114c05edd16173088501f794521ae7a1ac443d","src/cdsl/types.rs":"50620fb2a6271a7c9126dc30c433a1bf25646a4d84511f5745650aaaec700f42","src/cdsl/typevar.rs":"3cbe83a09d2402511b20415a8356f848fb82536926386bb42eaaa7740fb2457e","src/cdsl/xform.rs":"10760ea733d0462e2bd3ef636f657fa1817148761b1de6ffbfe0af3222a66438","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"3695066b8b58066a2f3959bb37a5464732a38dc10aebd65711ab6a1b26530a52","src/gen_inst.rs":"1f2eb68d2fca38b9e4b4f28125c5ea943efb2e1f1d927ada0d08a16937da1aba","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"a9001b09a60b28f63baeca6bac63781f48f4d68f65c8105ace1aedcd964b8468","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"a069c34d1fadc9b35aeafbf72cfd89140e5fa8b9136fc51118241591833a5cde","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"fba8fdd5d1ebef9cb34f0948f285cd3c63eed498e21bad100a69e316f961b737","src/isa/riscv/recipes.rs":"0f58141903aeb3a76a33b705e2dea3eb74864c42dd8b719d9e4f4f95ad0c5d80","src/isa/x86/encodings.rs":"83619a4b49da1eb7a946e2348f1c232cfc853c23387bab219e488b1118754085","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"186c688dd8ac773f2b2c4c1f1cbdb7a66ca13a8ed90c03f87dfe7fdaa12c15b3","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"c1a6e6657c4970f0ecb9287d835e9c0791908db1d10d0d35bb4899779a0d14f8","src/isa/x86/recipes.rs":"744292109344363b2210ac1b42cb4704b4b692aa8bf5583e4230557cf3749298","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"0c5bca85724e51d87ed431b2f783d9352d510ccc42b59170c808a5a041836649","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"14b668244b2afd71197c2dd8469af0e0602d590fcb14252c2b0b40cb9905a4ae","src/shared/immediates.rs":"42793948a4a84058059d39212236c10d46efa7f69b21d904735343525819209d","src/shared/instructions.rs":"b328e34c28d86046387372f84551c7ceeb230f6e82d044fba914bf80e2a183f5","src/shared/legalize.rs":"eb5f07fa107cadd67483881ccce29cc8fb9b698a0cd4f1d89853aac275cf7bcf","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"10602e7f4ac1fa307a178490c7e531760d001c1f3a416adacd4458e6ac34ee67","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
--- a/third_party/rust/cranelift-codegen-meta/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-meta/Cargo.toml
@ -1,19 +1,20 @@
 [package]
 name = "cranelift-codegen-meta"
 authors = ["The Cranelift Project Developers"]
-version = "0.68.0"
+version = "0.73.0"
 description = "Metaprogram for cranelift-codegen code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
 readme = "README.md"
 edition = "2018"

-[package.metadata.docs.rs]
-rustdoc-args = [ "--document-private-items" ]
+# FIXME(rust-lang/cargo#9300): uncomment once that lands
+# [package.metadata.docs.rs]
+# rustdoc-args = [ "--document-private-items" ]

 [dependencies]
-cranelift-codegen-shared = { path = "../shared", version = "0.68.0" }
-cranelift-entity = { path = "../../entity", version = "0.68.0" }
+cranelift-codegen-shared = { path = "../shared", version = "0.73.0" }
+cranelift-entity = { path = "../../entity", version = "0.73.0" }

 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
@ -296,7 +296,8 @@ impl Var {
    pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) {
        assert!(
            self.get_def(position).is_none(),
-            format!("redefinition of variable {}", self.name)
+            "redefinition of variable {}",
+            self.name
        );
        match position {
            PatternPosition::Source => {
@ -461,7 +462,8 @@ impl Apply {
        // Basic check on number of arguments.
        assert!(
            inst.operands_in.len() == args.len(),
-            format!("incorrect number of arguments in instruction {}", inst.name)
+            "incorrect number of arguments in instruction {}",
+            inst.name
        );

        // Check that the kinds of Literals arguments match the expected operand.
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
@ -153,10 +153,9 @@ impl EncodingBuilder {
        let inst = self.inst.inst();
        assert!(
            Rc::ptr_eq(&inst.format, &recipes[self.recipe].format),
-            format!(
-                "Inst {} and recipe {} must have the same format!",
-                inst.name, recipes[self.recipe].name
-            )
+            "Inst {} and recipe {} must have the same format!",
+            inst.name,
+            recipes[self.recipe].name
        );

        assert_eq!(
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
@ -394,7 +394,7 @@ impl ValueTypeOrAny {
    pub fn expect(self, msg: &str) -> ValueType {
        match self {
            ValueTypeOrAny::ValueType(vt) => vt,
-            ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)),
+            ValueTypeOrAny::Any => panic!("Unexpected Any: {}", msg),
        }
    }
 }
@ -665,7 +665,7 @@ fn verify_polymorphic(
    if operands_out.is_empty() {
        // No result means no other possible type variable, so it's a type inference failure.
        match maybe_error_message {
-            Some(msg) => panic!(msg),
+            Some(msg) => panic!("{}", msg),
            None => panic!("typevar_operand must be a free type variable"),
        }
    }
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
@ -260,10 +260,9 @@ impl EncodingRecipeBuilder {
        if !self.format.has_value_list {
            assert!(
                operands_in.len() == self.format.num_value_operands,
-                format!(
-                    "missing operand constraints for recipe {} (format {})",
-                    self.name, self.format.name
-                )
+                "missing operand constraints for recipe {} (format {})",
+                self.name,
+                self.format.name
            );
        }

--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
@ -20,6 +20,7 @@ pub(crate) enum SpecificSetting {
 #[derive(Hash, PartialEq, Eq)]
 pub(crate) struct Setting {
    pub name: &'static str,
+    pub description: &'static str,
    pub comment: &'static str,
    pub specific: SpecificSetting,
    pub byte_offset: u8,
@ -88,6 +89,7 @@ impl Into<PresetType> for PresetIndex {
 #[derive(Hash, PartialEq, Eq)]
 pub(crate) struct Preset {
    pub name: &'static str,
+    pub description: &'static str,
    values: Vec<BoolSettingIndex>,
 }

@ -169,6 +171,7 @@ pub(crate) enum ProtoSpecificSetting {
 /// This is the information provided during building for a setting.
 struct ProtoSetting {
    name: &'static str,
+    description: &'static str,
    comment: &'static str,
    specific: ProtoSpecificSetting,
 }
@ -251,11 +254,13 @@ impl SettingGroupBuilder {
    fn add_setting(
        &mut self,
        name: &'static str,
+        description: &'static str,
        comment: &'static str,
        specific: ProtoSpecificSetting,
    ) {
        self.settings.push(ProtoSetting {
            name,
+            description,
            comment,
            specific,
        })
@ -264,6 +269,7 @@ impl SettingGroupBuilder {
    pub fn add_bool(
        &mut self,
        name: &'static str,
+        description: &'static str,
        comment: &'static str,
        default: bool,
    ) -> BoolSettingIndex {
@ -271,28 +277,55 @@ impl SettingGroupBuilder {
            self.predicates.is_empty(),
            "predicates must be added after the boolean settings"
        );
-        self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
+        self.add_setting(
+            name,
+            description,
+            comment,
+            ProtoSpecificSetting::Bool(default),
+        );
        BoolSettingIndex(self.settings.len() - 1)
    }

    pub fn add_enum(
        &mut self,
        name: &'static str,
+        description: &'static str,
        comment: &'static str,
        values: Vec<&'static str>,
    ) {
-        self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
+        self.add_setting(
+            name,
+            description,
+            comment,
+            ProtoSpecificSetting::Enum(values),
+        );
    }

-    pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
-        self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
+    pub fn add_num(
+        &mut self,
+        name: &'static str,
+        description: &'static str,
+        comment: &'static str,
+        default: u8,
+    ) {
+        self.add_setting(
+            name,
+            description,
+            comment,
+            ProtoSpecificSetting::Num(default),
+        );
    }

    pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
        self.predicates.push(ProtoPredicate { name, node });
    }

-    pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
+    pub fn add_preset(
+        &mut self,
+        name: &'static str,
+        description: &'static str,
+        args: Vec<PresetType>,
+    ) -> PresetIndex {
        let mut values = Vec::new();
        for arg in args {
            match arg {
@ -302,7 +335,11 @@ impl SettingGroupBuilder {
                PresetType::BoolSetting(index) => values.push(index),
            }
        }
-        self.presets.push(Preset { name, values });
+        self.presets.push(Preset {
+            name,
+            description,
+            values,
+        });
        PresetIndex(self.presets.len() - 1)
    }

@ -347,6 +384,7 @@ impl SettingGroupBuilder {

            group.settings.push(Setting {
                name: s.name,
+                description: s.description,
                comment: s.comment,
                byte_offset,
                specific,
@ -367,6 +405,7 @@ impl SettingGroupBuilder {
            };
            group.settings.push(Setting {
                name: s.name,
+                description: s.description,
                comment: s.comment,
                byte_offset: byte_offset + predicate_number / 8,
                specific: SpecificSetting::Bool(BoolSetting {
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
@ -210,7 +210,8 @@ impl TypeEnvironment {
                None => {
                    assert!(
                        !actual_tv.name.starts_with("typeof_"),
-                        format!("variable {} should be explicitly ranked", actual_tv.name)
+                        "variable {} should be explicitly ranked",
+                        actual_tv.name
                    );
                    None
                }
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
@ -74,19 +74,18 @@ impl Transform {
        for &var_index in &input_vars {
            assert!(
                var_pool.get(var_index).is_input(),
-                format!("'{:?}' used as both input and def", var_pool.get(var_index))
+                "'{:?}' used as both input and def",
+                var_pool.get(var_index)
            );
        }
        assert!(
            input_vars.len() == num_src_inputs,
-            format!(
-                "extra input vars in dst pattern: {:?}",
-                input_vars
-                    .iter()
-                    .map(|&i| var_pool.get(i))
-                    .skip(num_src_inputs)
-                    .collect::<Vec<_>>()
-            )
+            "extra input vars in dst pattern: {:?}",
+            input_vars
+                .iter()
+                .map(|&i| var_pool.get(i))
+                .skip(num_src_inputs)
+                .collect::<Vec<_>>()
        );

        // Perform type inference and cleanup.
@ -143,7 +142,8 @@ impl Transform {
            let defined_var = self.var_pool.get(var_index);
            assert!(
                defined_var.is_output(),
-                format!("{:?} not defined in the destination pattern", defined_var)
+                "{:?} not defined in the destination pattern",
+                defined_var
            );
        }
    }
@ -226,7 +226,8 @@ fn rewrite_expr(
                let var = var_pool.get(own_var);
                assert!(
                    var.is_input() || var.get_def(position).is_some(),
-                    format!("{:?} used as both input and def", var)
+                    "{:?} used as both input and def",
+                    var
                );
                args.push(Expr::Var(own_var));
            }
@ -400,10 +401,8 @@ impl TransformGroupBuilder {
            self.custom_legalizes
                .insert(inst.camel_name.clone(), func_name)
                .is_none(),
-            format!(
-                "custom legalization action for {} inserted twice",
-                inst.name
-            )
+            "custom legalization action for {} inserted twice",
+            inst.name
        );
    }

@ -442,7 +441,8 @@ impl TransformGroups {
        for group in self.groups.values() {
            assert!(
                group.name != new_group.name,
-                format!("trying to insert {} for the second time", new_group.name)
+                "trying to insert {} for the second time",
+                new_group.name
            );
        }
        self.groups.push(new_group)
@ -459,7 +459,7 @@ impl TransformGroups {
                return group;
            }
        }
-        panic!(format!("transform group with name {} not found", name));
+        panic!("transform group with name {} not found", name);
    }
 }

--- a/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
@ -99,8 +99,8 @@ fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter)
                Some(previous_format_name) => {
                    assert!(
                        previous_format_name == leaf_format_name,
-                        format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
-                    ));
+                        "Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
+                    );
                }
            }
        }
--- a/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
@ -68,6 +68,7 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
 /// `ValueList` to store the additional information out of line.
 fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
    fmt.line("#[derive(Clone, Debug)]");
+    fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
    fmt.line("#[allow(missing_docs)]");
    fmt.line("pub enum InstructionData {");
    fmt.indent(|fmt| {
@ -410,7 +411,10 @@ fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
    fmt.line("#[repr(u16)]");
    fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]");
    fmt.line(
-        r#"#[cfg_attr(feature = "enable-peepmatic", derive(serde::Serialize, serde::Deserialize))]"#
+        r#"#[cfg_attr(
+            any(feature = "enable-peepmatic", feature = "enable-serde"),
+            derive(serde::Serialize, serde::Deserialize)
+        )]"#,
    );

    // We explicitly set the discriminant of the first variant to 1, which allows us to take
--- a/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
@ -70,6 +70,33 @@ fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatte
    fmtln!(fmt, "}");
 }

+/// Generates the `iter` function.
+fn gen_iterator(group: &SettingGroup, fmt: &mut Formatter) {
+    fmtln!(fmt, "impl Flags {");
+    fmt.indent(|fmt| {
+        fmt.doc_comment("Iterates the setting values.");
+        fmtln!(fmt, "pub fn iter(&self) -> impl Iterator<Item = Value> {");
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "let mut bytes = [0; {}];", group.settings_size);
+            fmtln!(fmt, "bytes.copy_from_slice(&self.bytes[0..{}]);", group.settings_size);
+            fmtln!(fmt, "DESCRIPTORS.iter().filter_map(move |d| {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "let values = match &d.detail {");
+                fmt.indent(|fmt| {
+                    fmtln!(fmt, "detail::Detail::Preset => return None,");
+                    fmtln!(fmt, "detail::Detail::Enum { last, enumerators } => Some(TEMPLATE.enums(*last, *enumerators)),");
+                    fmtln!(fmt, "_ => None");
+                });
+                fmtln!(fmt, "};");
+                fmtln!(fmt, "Some(Value{ name: d.name, detail: d.detail, values, value: bytes[d.offset as usize] })");
+            });
+            fmtln!(fmt, "})");
+        });
+        fmtln!(fmt, "}");
+    });
+    fmtln!(fmt, "}");
+}
+
 /// Emit Display and FromStr implementations for enum settings.
 fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
    fmtln!(fmt, "impl fmt::Display for {} {{", name);
@ -136,7 +163,7 @@ fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {

 /// Emit a getter function for `setting`.
 fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
-    fmt.doc_comment(setting.comment);
+    fmt.doc_comment(format!("{}\n{}", setting.description, setting.comment));
    match setting.specific {
        SpecificSetting::Bool(BoolSetting {
            predicate_number, ..
@ -254,6 +281,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
            fmtln!(fmt, "detail::Descriptor {");
            fmt.indent(|fmt| {
                fmtln!(fmt, "name: \"{}\",", setting.name);
+                fmtln!(fmt, "description: \"{}\",", setting.description);
                fmtln!(fmt, "offset: {},", setting.byte_offset);
                match setting.specific {
                    SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
@ -286,6 +314,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
            fmtln!(fmt, "detail::Descriptor {");
            fmt.indent(|fmt| {
                fmtln!(fmt, "name: \"{}\",", preset.name);
+                fmtln!(fmt, "description: \"{}\",", preset.description);
                fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
                fmtln!(fmt, "detail: detail::Detail::Preset,");
            });
@ -418,7 +447,7 @@ fn gen_display(group: &SettingGroup, fmt: &mut Formatter) {

 fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
    // Generate struct.
-    fmtln!(fmt, "#[derive(Clone)]");
+    fmtln!(fmt, "#[derive(Clone, Hash)]");
    fmt.doc_comment(format!("Flags group `{}`.", group.name));
    fmtln!(fmt, "pub struct Flags {");
    fmt.indent(|fmt| {
@ -427,6 +456,7 @@ fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
    fmtln!(fmt, "}");

    gen_constructor(group, parent, fmt);
+    gen_iterator(group, fmt);
    gen_enum_types(group, fmt);
    gen_getters(group, fmt);
    gen_descriptors(group, fmt);
--- a/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
@ -8,7 +8,10 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
 use crate::shared::Definitions as SharedDefinitions;

 fn define_settings(_shared: &SettingGroup) -> SettingGroup {
-    let setting = SettingGroupBuilder::new("arm64");
+    let mut setting = SettingGroupBuilder::new("arm64");
+    let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false);
+
+    setting.add_predicate("use_lse", predicate!(has_lse));
    setting.build()
 }

--- a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
@ -17,33 +17,39 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
    let supports_m = setting.add_bool(
        "supports_m",
        "CPU supports the 'M' extension (mul/div)",
+        "",
        false,
    );
    let supports_a = setting.add_bool(
        "supports_a",
        "CPU supports the 'A' extension (atomics)",
+        "",
        false,
    );
    let supports_f = setting.add_bool(
        "supports_f",
        "CPU supports the 'F' extension (float)",
+        "",
        false,
    );
    let supports_d = setting.add_bool(
        "supports_d",
        "CPU supports the 'D' extension (double)",
+        "",
        false,
    );

    let enable_m = setting.add_bool(
        "enable_m",
        "Enable the use of 'M' instructions if available",
+        "",
        true,
    );

    setting.add_bool(
        "enable_e",
        "Enable the 'RV32E' instruction set with only 16 registers",
+        "",
        false,
    );

--- a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
@ -25,7 +25,8 @@ impl RecipeGroup {
    fn push(&mut self, builder: EncodingRecipeBuilder) {
        assert!(
            self.name_to_recipe.get(&builder.name).is_none(),
-            format!("riscv recipe '{}' created twice", builder.name)
+            "riscv recipe '{}' created twice",
+            builder.name
        );
        let name = builder.name.clone();
        let number = self.recipes.push(builder.build());
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@ -45,10 +45,8 @@ impl PerCpuModeEncodings {
        if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
            assert!(
                self.recipes[*found_index] == recipe,
-                format!(
-                    "trying to insert different recipes with a same name ({})",
-                    recipe.name
-                )
+                "trying to insert different recipes with a same name ({})",
+                recipe.name
            );
            *found_index
        } else {
@ -549,10 +547,13 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
    }
    e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());

+    // You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be
+    // on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can
+    // safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode.
    let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
    e.enc_both_instp(
        iconst.bind(I8),
-        rec_u_id_z.opcodes(&XORB),
+        rec_u_id_z.opcodes(&XOR),
        is_zero_int.clone(),
    );

@ -1688,6 +1689,7 @@ fn define_simd(
    let usub_sat = shared.by_name("usub_sat");
    let vconst = shared.by_name("vconst");
    let vselect = shared.by_name("vselect");
+    let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s");
    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
    let x86_insertps = x86.by_name("x86_insertps");
    let x86_fmax = x86.by_name("x86_fmax");
@ -2210,6 +2212,9 @@ fn define_simd(
    // SIMD multiplication with lane expansion.
    e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));

+    // SIMD multiplication and add adjacent pairs, from SSE2.
+    e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD));
+
    // SIMD integer multiplication for I64x2 using a AVX512.
    {
        e.enc_32_64_maybe_isap(
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@ -396,7 +396,6 @@ fn define_simd(
    let insertlane = insts.by_name("insertlane");
    let ishl = insts.by_name("ishl");
    let ishl_imm = insts.by_name("ishl_imm");
-    let load_splat = insts.by_name("load_splat");
    let raw_bitcast = insts.by_name("raw_bitcast");
    let scalar_to_vector = insts.by_name("scalar_to_vector");
    let splat = insts.by_name("splat");
@ -821,7 +820,6 @@ fn define_simd(
    narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
    narrow.custom_legalize(fmin, "expand_minmax_vector");
    narrow.custom_legalize(fmax, "expand_minmax_vector");
-    narrow.custom_legalize(load_splat, "expand_load_splat");

    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@ -508,6 +508,9 @@ pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
 /// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
 pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];

+/// Multiply the packed word integers, add adjacent doubleword results.
+pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
+
 /// Pop top of stack into r{16,32,64}; increment stack pointer.
 pub static POP_REG: [u8; 1] = [0x58];

@ -711,9 +714,6 @@ pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
 /// r/m{16,32,64} XOR register of the same size.
 pub static XOR: [u8; 1] = [0x31];

-/// r/m8 XOR r8.
-pub static XORB: [u8; 1] = [0x30];
-
 /// Bitwise logical XOR of packed double-precision floating-point values.
 pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];

--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
@ -4,37 +4,77 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    let mut settings = SettingGroupBuilder::new("x86");

    // CPUID.01H:ECX
-    let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
-    let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
-    let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
-    let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
-    let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
-    let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
+    let has_sse3 = settings.add_bool(
+        "has_sse3",
+        "Has support for SSE3.",
+        "SSE3: CPUID.01H:ECX.SSE3[bit 0]",
+        false,
+    );
+    let has_ssse3 = settings.add_bool(
+        "has_ssse3",
+        "Has support for SSSE3.",
+        "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
+        false,
+    );
+    let has_sse41 = settings.add_bool(
+        "has_sse41",
+        "Has support for SSE4.1.",
+        "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]",
+        false,
+    );
+    let has_sse42 = settings.add_bool(
+        "has_sse42",
+        "Has support for SSE4.2.",
+        "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]",
+        false,
+    );
+    let has_avx = settings.add_bool(
+        "has_avx",
+        "Has support for AVX.",
+        "AVX: CPUID.01H:ECX.AVX[bit 28]",
+        false,
+    );
+    let has_avx2 = settings.add_bool(
+        "has_avx2",
+        "Has support for AVX2.",
+        "AVX2: CPUID.07H:EBX.AVX2[bit 5]",
+        false,
+    );
    let has_avx512dq = settings.add_bool(
        "has_avx512dq",
+        "Has support for AVX512DQ.",
        "AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
        false,
    );
    let has_avx512vl = settings.add_bool(
        "has_avx512vl",
+        "Has support for AVX512VL.",
        "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
        false,
    );
    let has_avx512f = settings.add_bool(
        "has_avx512f",
+        "Has support for AVX512F.",
        "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
        false,
    );
-    let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
+    let has_popcnt = settings.add_bool(
+        "has_popcnt",
+        "Has support for POPCNT.",
+        "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]",
+        false,
+    );

    // CPUID.(EAX=07H, ECX=0H):EBX
    let has_bmi1 = settings.add_bool(
        "has_bmi1",
+        "Has support for BMI1.",
        "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
        false,
    );
    let has_bmi2 = settings.add_bool(
        "has_bmi2",
+        "Has support for BMI2.",
        "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
        false,
    );
@ -42,6 +82,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    // CPUID.EAX=80000001H:ECX
    let has_lzcnt = settings.add_bool(
        "has_lzcnt",
+        "Has support for LZCNT.",
        "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
        false,
    );
@ -85,7 +126,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));

    // Some shared boolean values are used in x86 instruction predicates, so we need to group them
-    // in the same TargetIsa, for compabitibity with code generated by meta-python.
+    // in the same TargetIsa, for compatibility with code generated by meta-python.
    // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
    // back in the shared SettingGroup, and use it in x86 instruction predicates.

@ -104,21 +145,40 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {

    // Presets corresponding to x86 CPUs.

-    settings.add_preset("baseline", preset!());
+    settings.add_preset(
+        "baseline",
+        "A baseline preset with no extensions enabled.",
+        preset!(),
+    );
    let nehalem = settings.add_preset(
        "nehalem",
+        "Nehalem microarchitecture.",
        preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
    );
    let haswell = settings.add_preset(
        "haswell",
+        "Haswell microarchitecture.",
        preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
    );
-    let broadwell = settings.add_preset("broadwell", preset!(haswell));
-    let skylake = settings.add_preset("skylake", preset!(broadwell));
-    let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
-    settings.add_preset("icelake", preset!(cannonlake));
+    let broadwell = settings.add_preset(
+        "broadwell",
+        "Broadwell microarchitecture.",
+        preset!(haswell),
+    );
+    let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell));
+    let cannonlake = settings.add_preset(
+        "cannonlake",
+        "Canon Lake microarchitecture.",
+        preset!(skylake),
+    );
+    settings.add_preset(
+        "icelake",
+        "Ice Lake microarchitecture.",
+        preset!(cannonlake),
+    );
    settings.add_preset(
        "znver1",
+        "Zen (first generation) microarchitecture.",
        preset!(
            has_sse3
                && has_ssse3
--- a/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
@ -164,9 +164,14 @@ impl Immediates {
                atomic_rmw_op_values.insert("add", "Add");
                atomic_rmw_op_values.insert("sub", "Sub");
                atomic_rmw_op_values.insert("and", "And");
+                atomic_rmw_op_values.insert("nand", "Nand");
                atomic_rmw_op_values.insert("or", "Or");
                atomic_rmw_op_values.insert("xor", "Xor");
                atomic_rmw_op_values.insert("xchg", "Xchg");
+                atomic_rmw_op_values.insert("umin", "Umin");
+                atomic_rmw_op_values.insert("umax", "Umax");
+                atomic_rmw_op_values.insert("smin", "Smin");
+                atomic_rmw_op_values.insert("smax", "Smax");
                new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values)
                    .with_doc("Atomic Read-Modify-Write Ops")
            },
--- a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@ -3582,7 +3582,7 @@ pub(crate) fn define(
            "fmin_pseudo",
            r#"
        Floating point pseudo-minimum, propagating NaNs.  This behaves differently from ``fmin``.
-        See https://github.com/WebAssembly/simd/pull/122 for background.
+        See <https://github.com/WebAssembly/simd/pull/122> for background.

        The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@ -3614,7 +3614,7 @@ pub(crate) fn define(
            "fmax_pseudo",
            r#"
        Floating point pseudo-maximum, propagating NaNs.  This behaves differently from ``fmax``.
-        See https://github.com/WebAssembly/simd/pull/122 for background.
+        See <https://github.com/WebAssembly/simd/pull/122> for background.

        The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@ -4102,7 +4102,7 @@ pub(crate) fn define(
        This will double the lane width and halve the number of lanes.  So the resulting
        vector has the same number of bits as `x` and `y` do (individually).

-        See https://github.com/WebAssembly/simd/pull/127 for background info.
+        See <https://github.com/WebAssembly/simd/pull/127> for background info.
            "#,
            &formats.binary,
        )
@ -4325,6 +4325,26 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    ig.push(
+        Inst::new(
+            "fcvt_low_from_sint",
+            r#"
+        Converts packed signed doubleword integers to packed double precision floating point.
+
+        Considering only the low half of the register, each lane in `x` is interpreted as a
+        signed doubleword integer that is then converted to a double precision float. This
+        instruction differs from fcvt_from_sint in that it converts half the number of lanes
+        which are converted to occupy twice the number of bits. No rounding should be needed
+        for the resulting float.
+
+        The result type will have half the number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
    let WideInt = &TypeVar::new(
        "WideInt",
        "An integer type with lanes from `i16` upwards",
@ -4491,24 +4511,5 @@ pub(crate) fn define(
        .other_side_effects(true),
    );

-    let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
-    let a = &Operand::new("a", TxN);
-
-    ig.push(
-        Inst::new(
-            "load_splat",
-            r#"
-        Load an element from memory at ``p + Offset`` and return a vector
-        whose lanes are all set to that element.
-
-        This is equivalent to ``load`` followed by ``splat``.
-        "#,
-            &formats.load,
-        )
-        .operands_in(vec![MemFlags, p, Offset])
-        .operands_out(vec![a])
-        .can_load(true),
-    );
-
    ig.build()
 }
--- a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
@ -5,29 +5,29 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_enum(
        "regalloc",
-        r#"Register allocator to use with the MachInst backend.
+        "Register allocator to use with the MachInst backend.",
+        r#"
+            This selects the register allocator as an option among those offered by the `regalloc.rs`
+            crate. Please report register allocation bugs to the maintainers of this crate whenever
+            possible.

-        This selects the register allocator as an option among those offered by the `regalloc.rs`
-        crate. Please report register allocation bugs to the maintainers of this crate whenever
-        possible.
+            Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
+            means the x86_64 backend doesn't use this yet.

-        Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
-        means the x86_64 backend doesn't use this yet.
+            Possible values:

-        Possible values:
-
-        - `backtracking` is a greedy, backtracking register allocator as implemented in
-        Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
-        it should generate better code in general, resulting in better throughput of generated
-        code.
-        - `backtracking_checked` is the backtracking allocator with additional self checks that may
-        take some time to run, and thus these checks are disabled by default.
-        - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
-        time to allocate registers, but generated code's quality may be inferior. As of
-        2020-04-17, it is still experimental and it should not be used in production settings.
-        - `experimental_linear_scan_checked` is the linear scan allocator with additional self
-        checks that may take some time to run, and thus these checks are disabled by default.
-    "#,
+            - `backtracking` is a greedy, backtracking register allocator as implemented in
+            Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
+            it should generate better code in general, resulting in better throughput of generated
+            code.
+            - `backtracking_checked` is the backtracking allocator with additional self checks that may
+            take some time to run, and thus these checks are disabled by default.
+            - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
+            time to allocate registers, but generated code's quality may be inferior. As of
+            2020-04-17, it is still experimental and it should not be used in production settings.
+            - `experimental_linear_scan_checked` is the linear scan allocator with additional self
+            checks that may take some time to run, and thus these checks are disabled by default.
+        "#,
        vec![
            "backtracking",
            "backtracking_checked",
@ -38,24 +38,23 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_enum(
        "opt_level",
+        "Optimization level for generated code.",
        r#"
-        Optimization level:
+            Supported levels:

-        - none: Minimise compile time by disabling most optimizations.
-        - speed: Generate the fastest possible code
-        - speed_and_size: like "speed", but also perform transformations
-          aimed at reducing code size.
+            - `none`: Minimise compile time by disabling most optimizations.
+            - `speed`: Generate the fastest possible code
+            - `speed_and_size`: like "speed", but also perform transformations aimed at reducing code size.
        "#,
        vec!["none", "speed", "speed_and_size"],
    );

    settings.add_bool(
        "enable_verifier",
+        "Run the Cranelift IR verifier at strategic times during compilation.",
        r#"
-        Run the Cranelift IR verifier at strategic times during compilation.
-
-        This makes compilation slower but catches many bugs. The verifier is always enabled by
-        default, which is useful during development.
+            This makes compilation slower but catches many bugs. The verifier is always enabled by
+            default, which is useful during development.
        "#,
        true,
    );
@ -65,110 +64,110 @@ pub(crate) fn define() -> SettingGroup {
    // `colocated` flag on external functions and global values.
    settings.add_bool(
        "is_pic",
-        "Enable Position-Independent Code generation",
+        "Enable Position-Independent Code generation.",
+        "",
        false,
    );

    settings.add_bool(
        "use_colocated_libcalls",
+        "Use colocated libcalls.",
        r#"
-            Use colocated libcalls.
-
            Generate code that assumes that libcalls can be declared "colocated",
            meaning they will be defined along with the current function, such that
            they can use more efficient addressing.
-            "#,
+        "#,
        false,
    );

    settings.add_bool(
        "avoid_div_traps",
+        "Generate explicit checks around native division instructions to avoid their trapping.",
        r#"
-            Generate explicit checks around native division instructions to avoid
-            their trapping.
-
            This is primarily used by SpiderMonkey which doesn't install a signal
            handler for SIGFPE, but expects a SIGILL trap for division by zero.

            On ISAs like ARM where the native division instructions don't trap,
            this setting has no effect - explicit checks are always inserted.
-            "#,
+        "#,
        false,
    );

    settings.add_bool(
        "enable_float",
+        "Enable the use of floating-point instructions.",
        r#"
-            Enable the use of floating-point instructions
-
            Disabling use of floating-point instructions is not yet implemented.
-            "#,
+        "#,
        true,
    );

    settings.add_bool(
        "enable_nan_canonicalization",
+        "Enable NaN canonicalization.",
        r#"
-            Enable NaN canonicalization
-
            This replaces NaNs with a single canonical value, for users requiring
            entirely deterministic WebAssembly computation. This is not required
            by the WebAssembly spec, so it is not enabled by default.
-            "#,
+        "#,
        false,
    );

    settings.add_bool(
        "enable_pinned_reg",
-        r#"Enable the use of the pinned register.
-
-        This register is excluded from register allocation, and is completely under the control of
-        the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
-        with the set_pinned_reg instruction.
+        "Enable the use of the pinned register.",
+        r#"
+            This register is excluded from register allocation, and is completely under the control of
+            the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
+            with the set_pinned_reg instruction.
        "#,
        false,
    );

    settings.add_bool(
        "use_pinned_reg_as_heap_base",
-        r#"Use the pinned register as the heap base.
+        "Use the pinned register as the heap base.",
+        r#"
+            Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
+            legalization of the `heap_addr` instruction so it will use the pinned register as the heap
+            base, instead of fetching it from a global value.

-        Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
-        legalization of the `heap_addr` instruction so it will use the pinned register as the heap
-        base, instead of fetching it from a global value.
-
-        Warning! Enabling this means that the pinned register *must* be maintained to contain the
-        heap base address at all times, during the lifetime of a function. Using the pinned
-        register for other purposes when this is set is very likely to cause crashes.
+            Warning! Enabling this means that the pinned register *must* be maintained to contain the
+            heap base address at all times, during the lifetime of a function. Using the pinned
+            register for other purposes when this is set is very likely to cause crashes.
        "#,
        false,
    );

-    settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
+    settings.add_bool(
+        "enable_simd",
+        "Enable the use of SIMD instructions.",
+        "",
+        false,
+    );

    settings.add_bool(
        "enable_atomics",
        "Enable the use of atomic instructions",
+        "",
        true,
    );

    settings.add_bool(
        "enable_safepoints",
+        "Enable safepoint instruction insertions.",
        r#"
-            Enable safepoint instruction insertions.
-
            This will allow the emit_stack_maps() function to insert the safepoint
            instruction on top of calls and interrupt traps in order to display the
            live reference values at that point in the program.
-            "#,
+        "#,
        false,
    );

    settings.add_enum(
        "tls_model",
-        r#"
-            Defines the model used to perform TLS accesses.
-        "#,
+        "Defines the model used to perform TLS accesses.",
+        "",
        vec!["none", "elf_gd", "macho", "coff"],
    );

@ -176,9 +175,9 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_enum(
        "libcall_call_conv",
+        "Defines the calling convention to use for LibCalls call expansion.",
        r#"
-            Defines the calling convention to use for LibCalls call expansion,
-            since it may be different from the ISA default calling convention.
+            This may be different from the ISA default calling convention.

            The default value is to use the same calling convention as the ISA
            default calling convention.
@ -192,6 +191,7 @@ pub(crate) fn define() -> SettingGroup {
            "cold",
            "system_v",
            "windows_fastcall",
+            "apple_aarch64",
            "baldrdash_system_v",
            "baldrdash_windows",
            "baldrdash_2020",
@ -201,9 +201,8 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_num(
        "baldrdash_prologue_words",
+        "Number of pointer-sized words pushed by the baldrdash prologue.",
        r#"
-            Number of pointer-sized words pushed by the baldrdash prologue.
-
            Functions with the `baldrdash` calling convention don't generate their
            own prologue and epilogue. They depend on externally generated code
            that pushes a fixed number of words in the prologue and restores them
@ -212,15 +211,46 @@ pub(crate) fn define() -> SettingGroup {
            This setting configures the number of pointer-sized words pushed on the
            stack when the Cranelift-generated code is entered. This includes the
            pushed return address on x86.
-            "#,
+        "#,
        0,
    );

+    settings.add_bool(
+        "enable_llvm_abi_extensions",
+        "Enable various ABI extensions defined by LLVM's behavior.",
+        r#"
+            In some cases, LLVM's implementation of an ABI (calling convention)
+            goes beyond a standard and supports additional argument types or
+            behavior. This option instructs Cranelift codegen to follow LLVM's
+            behavior where applicable.
+
+            Currently, this applies only to Windows Fastcall on x86-64, and
+            allows an `i128` argument to be spread across two 64-bit integer
+            registers. The Fastcall implementation otherwise does not support
+            `i128` arguments, and will panic if they are present and this
+            option is not set.
+        "#,
+        false,
+    );
+
+    settings.add_bool(
+        "unwind_info",
+        "Generate unwind information.",
+        r#"
+            This increases metadata size and compile time, but allows for the
+            debugger to trace frames, is needed for GC tracing that relies on
+            libunwind (such as in Wasmtime), and is unconditionally needed on
+            certain platforms (such as Windows) that must always be able to unwind.
+          "#,
+        true,
+    );
+
    // BaldrMonkey requires that not-yet-relocated function addresses be encoded
    // as all-ones bitpatterns.
    settings.add_bool(
        "emit_all_ones_funcaddrs",
        "Emit not-yet-relocated function addresses as all-ones bit patterns.",
+        "",
        false,
    );

@ -228,32 +258,27 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_bool(
        "enable_probestack",
-        r#"
-            Enable the use of stack probes, for calling conventions which support this
-            functionality.
-            "#,
+        "Enable the use of stack probes for supported calling conventions.",
+        "",
        true,
    );

    settings.add_bool(
        "probestack_func_adjusts_sp",
-        r#"
-            Set this to true of the stack probe function modifies the stack pointer
-            itself.
-            "#,
+        "Enable if the stack probe adjusts the stack pointer.",
+        "",
        false,
    );

    settings.add_num(
        "probestack_size_log2",
+        "The log2 of the size of the stack guard region.",
        r#"
-            The log2 of the size of the stack guard region.
-
            Stack frames larger than this size will have stack overflow checked
            by calling the probestack function.

            The default is 12, which translates to a size of 4096.
-            "#,
+        "#,
        12,
    );

@ -262,6 +287,7 @@ pub(crate) fn define() -> SettingGroup {
    settings.add_bool(
        "enable_jump_tables",
        "Enable the use of jump tables in generated machine code.",
+        "",
        true,
    );

@ -269,16 +295,15 @@ pub(crate) fn define() -> SettingGroup {

    settings.add_bool(
        "enable_heap_access_spectre_mitigation",
+        "Enable Spectre mitigation on heap bounds checks.",
        r#"
-        Enable Spectre mitigation on heap bounds checks.
+            This is a no-op for any heap that needs no bounds checks; e.g.,
+            if the limit is static and the guard region is large enough that
+            the index cannot reach past it.

-        This is a no-op for any heap that needs no bounds checks; e.g.,
-        if the limit is static and the guard region is large enough that
-        the index cannot reach past it.
-
-        This option is enabled by default because it is highly
-        recommended for secure sandboxing. The embedder should consider
-        the security implications carefully before disabling this option.
+            This option is enabled by default because it is highly
+            recommended for secure sandboxing. The embedder should consider
+            the security implications carefully before disabling this option.
        "#,
        true,
    );
--- a/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"322ab8efd1588c57313b18aaa231ee30a888741828cf27283e6c62735d73d02d","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"7a8eda4dafcf47100c41e61b5c985f089d1985c500624956dc183fcf6bc7b183"},"package":null}
+{"files":{"Cargo.toml":"940852948d4feaabc1a1b694b6901509099d5f464f623928edd24b9cacd0c8dd","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"d9f657a24170255c8136c2b07a2a982f9a4e02f23d425cb07fdf95b76c15825d","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"7a8eda4dafcf47100c41e61b5c985f089d1985c500624956dc183fcf6bc7b183"},"package":null}
--- a/third_party/rust/cranelift-codegen-shared/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-shared/Cargo.toml
@ -1,11 +1,17 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen-shared"
-version = "0.68.0"
+version = "0.73.0"
 description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
 readme = "README.md"
 edition = "2018"

-# Since this is a shared dependency of several packages, please strive to keep this dependency-free.
+[dependencies]
+# Since this is a shared dependency of several packages, please strive to keep this dependency-free
+# when no features are enabled.
+serde = { version = "1.0.94", features = ["derive"], optional = true }
+
+[features]
+enable-serde = ["serde"]
--- a/third_party/rust/cranelift-codegen-shared/src/condcodes.rs
+++ b/third_party/rust/cranelift-codegen-shared/src/condcodes.rs
@ -7,6 +7,9 @@
 use core::fmt::{self, Display, Formatter};
 use core::str::FromStr;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Common traits of condition codes.
 pub trait CondCode: Copy {
    /// Get the inverse condition code of `self`.
@ -30,6 +33,7 @@ pub trait CondCode: Copy {
 /// separate codes for comparing the integers as signed or unsigned numbers where it makes a
 /// difference.
 #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum IntCC {
    /// `==`.
    Equal,
@ -187,6 +191,7 @@ impl FromStr for IntCC {
 /// comparison. The 14 condition codes here cover every possible combination of the relation above
 /// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
 #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum FloatCC {
    /// EQ | LT | GT
    Ordered,
--- a/third_party/rust/cranelift-codegen/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen/.cargo-checksum.json
--- a/third_party/rust/cranelift-codegen/Cargo.toml
+++ b/third_party/rust/cranelift-codegen/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen"
-version = "0.68.0"
+version = "0.73.0"
 description = "Low-level code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-codegen"
@ -13,31 +13,31 @@ build = "build.rs"
 edition = "2018"

 [dependencies]
-cranelift-codegen-shared = { path = "./shared", version = "0.68.0" }
-cranelift-entity = { path = "../entity", version = "0.68.0" }
-cranelift-bforest = { path = "../bforest", version = "0.68.0" }
+cranelift-codegen-shared = { path = "./shared", version = "0.73.0" }
+cranelift-entity = { path = "../entity", version = "0.73.0" }
+cranelift-bforest = { path = "../bforest", version = "0.73.0" }
 hashbrown = { version = "0.9.1", optional = true }
-target-lexicon = "0.11"
+target-lexicon = "0.12"
 log = { version = "0.4.6", default-features = false }
 serde = { version = "1.0.94", features = ["derive"], optional = true }
 bincode = { version = "1.2.1", optional = true }
 gimli = { version = "0.23.0", default-features = false, features = ["write"], optional = true }
-smallvec = { version = "1.0.0" }
+smallvec = { version = "1.6.1" }
 thiserror = "1.0.4"
 byteorder = { version = "1.3.2", default-features = false }
-peepmatic = { path = "../peepmatic", optional = true, version = "0.68.0" }
-peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.68.0" }
-peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.68.0" }
-regalloc = { git = "https://github.com/mozilla-spidermonkey/regalloc.rs", rev = "fc5d1d33317b0fbd36725757f80a95127eff5109" }
-souper-ir = { version = "1", optional = true }
-wast = { version = "27.0.0", optional = true }
+peepmatic = { path = "../peepmatic", optional = true, version = "0.73.0" }
+peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.73.0" }
+peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.73.0" }
+regalloc = { version = "0.0.31" }
+souper-ir = { version = "2.1.0", optional = true }
+wast = { version = "35.0.0", optional = true }
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
 # accomodated in `tests`.

 [build-dependencies]
-cranelift-codegen-meta = { path = "meta", version = "0.68.0" }
+cranelift-codegen-meta = { path = "meta", version = "0.73.0" }

 [features]
 default = ["std", "unwind"]
@ -63,9 +63,15 @@ unwind = ["gimli"]
 x86 = []
 arm64 = []
 riscv = []
-x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
 arm32 = [] # Work-in-progress codegen backend for ARM.

+# Stub feature that does nothing, for Cargo-features compatibility: the new
+# backend is the default now.
+experimental_x64 = []
+
+# Make the old x86 backend the default.
+old-x86-backend = []
+
 # Option to enable all architectures.
 all-arch = [
    "x86",
@ -74,7 +80,12 @@ all-arch = [
 ]

 # For dependent crates that want to serialize some parts of cranelift
-enable-serde = ["serde"]
+enable-serde = [
+    "serde",
+    "regalloc/enable-serde",
+    "cranelift-entity/enable-serde",
+    "cranelift-codegen-shared/enable-serde"
+]

 # Allow snapshotting regalloc test cases. Useful only to report bad register
 # allocation failures, or for regalloc.rs developers.
--- a/third_party/rust/cranelift-codegen/build.rs
+++ b/third_party/rust/cranelift-codegen/build.rs
@ -17,6 +17,7 @@
 use cranelift_codegen_meta as meta;

 use std::env;
+use std::io::Read;
 use std::process;
 use std::time::Instant;

@ -97,4 +98,40 @@ fn main() {
        )
        .unwrap()
    }
+
+    let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
+    let mut cmd = std::process::Command::new("git");
+    cmd.arg("rev-parse")
+        .arg("HEAD")
+        .stdout(std::process::Stdio::piped())
+        .current_dir(env::var("CARGO_MANIFEST_DIR").unwrap());
+    let version = if let Ok(mut child) = cmd.spawn() {
+        let mut git_rev = String::new();
+        child
+            .stdout
+            .as_mut()
+            .unwrap()
+            .read_to_string(&mut git_rev)
+            .unwrap();
+        let status = child.wait().unwrap();
+        if status.success() {
+            let git_rev = git_rev.trim().chars().take(9).collect::<String>();
+            format!("{}-{}", pkg_version, git_rev)
+        } else {
+            // not a git repo
+            pkg_version
+        }
+    } else {
+        // git not available
+        pkg_version
+    };
+    std::fs::write(
+        std::path::Path::new(&out_dir).join("version.rs"),
+        format!(
+            "/// Version number of this crate. \n\
+            pub const VERSION: &str = \"{}\";",
+            version
+        ),
+    )
+    .unwrap();
 }
--- a/third_party/rust/cranelift-codegen/src/context.rs
+++ b/third_party/rust/cranelift-codegen/src/context.rs
@ -22,7 +22,7 @@ use crate::legalize_function;
 use crate::legalizer::simple_legalize;
 use crate::licm::do_licm;
 use crate::loop_analysis::LoopAnalysis;
-use crate::machinst::MachCompileResult;
+use crate::machinst::{MachCompileResult, MachStackMap};
 use crate::nan_canonicalization::do_nan_canonicalization;
 use crate::postopt::do_postopt;
 use crate::redundant_reload_remover::RedundantReloadRemover;
@ -239,10 +239,23 @@ impl Context {
        let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps);
        if let Some(ref result) = &self.mach_compile_result {
            result.buffer.emit(&mut sink);
+            let info = sink.info;
+            // New backends do not emit StackMaps through the `CodeSink` because its interface
+            // requires `Value`s; instead, the `StackMap` objects are directly accessible via
+            // `result.buffer.stack_maps()`.
+            for &MachStackMap {
+                offset_end,
+                ref stack_map,
+                ..
+            } in result.buffer.stack_maps()
+            {
+                stack_maps.add_stack_map(offset_end, stack_map.clone());
+            }
+            info
        } else {
            isa.emit_function_to_memory(&self.func, &mut sink);
+            sink.info
        }
-        sink.info
    }

    /// Creates unwind information for the function.
@ -460,6 +473,7 @@ impl Context {
        Ok(build_value_labels_ranges::<ComparableSourceLoc>(
            &self.func,
            &self.regalloc,
+            self.mach_compile_result.as_ref(),
            isa,
        ))
    }
--- a/third_party/rust/cranelift-codegen/src/ir/atomic_rmw_op.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/atomic_rmw_op.rs
@ -14,12 +14,22 @@ pub enum AtomicRmwOp {
    Sub,
    /// And
    And,
+    /// Nand
+    Nand,
    /// Or
    Or,
    /// Xor
    Xor,
    /// Exchange
    Xchg,
+    /// Unsigned min
+    Umin,
+    /// Unsigned max
+    Umax,
+    /// Signed min
+    Smin,
+    /// Signed max
+    Smax,
 }

 impl Display for AtomicRmwOp {
@ -28,9 +38,14 @@ impl Display for AtomicRmwOp {
            AtomicRmwOp::Add => "add",
            AtomicRmwOp::Sub => "sub",
            AtomicRmwOp::And => "and",
+            AtomicRmwOp::Nand => "nand",
            AtomicRmwOp::Or => "or",
            AtomicRmwOp::Xor => "xor",
            AtomicRmwOp::Xchg => "xchg",
+            AtomicRmwOp::Umin => "umin",
+            AtomicRmwOp::Umax => "umax",
+            AtomicRmwOp::Smin => "smin",
+            AtomicRmwOp::Smax => "smax",
        };
        f.write_str(s)
    }
@ -43,9 +58,14 @@ impl FromStr for AtomicRmwOp {
            "add" => Ok(AtomicRmwOp::Add),
            "sub" => Ok(AtomicRmwOp::Sub),
            "and" => Ok(AtomicRmwOp::And),
+            "nand" => Ok(AtomicRmwOp::Nand),
            "or" => Ok(AtomicRmwOp::Or),
            "xor" => Ok(AtomicRmwOp::Xor),
            "xchg" => Ok(AtomicRmwOp::Xchg),
+            "umin" => Ok(AtomicRmwOp::Umin),
+            "umax" => Ok(AtomicRmwOp::Umax),
+            "smin" => Ok(AtomicRmwOp::Smin),
+            "smax" => Ok(AtomicRmwOp::Smax),
            _ => Err(()),
        }
    }
--- a/third_party/rust/cranelift-codegen/src/ir/constant.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/constant.rs
@ -19,12 +19,16 @@ use core::slice::Iter;
 use core::str::{from_utf8, FromStr};
 use cranelift_entity::EntityRef;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// This type describes the actual constant data. Note that the bytes stored in this structure are
 /// expected to be in little-endian order; this is due to ease-of-use when interacting with
 /// WebAssembly values, which are [little-endian by design].
 ///
 /// [little-endian by design]: https://github.com/WebAssembly/design/blob/master/Portability.md
 #[derive(Clone, Hash, Eq, PartialEq, Debug, Default)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ConstantData(Vec<u8>);

 impl FromIterator<u8> for ConstantData {
@ -173,6 +177,7 @@ pub type ConstantOffset = u32;
 /// from the beginning of the function is known (see
 /// `relaxation` in `relaxation.rs`).
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ConstantPoolEntry {
    data: ConstantData,
    offset: Option<ConstantOffset>,
@ -197,6 +202,7 @@ impl ConstantPoolEntry {
 /// Maintains the mapping between a constant handle (i.e.  [`Constant`](crate::ir::Constant)) and
 /// its constant data (i.e.  [`ConstantData`](crate::ir::ConstantData)).
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ConstantPool {
    /// This mapping maintains the insertion order as long as Constants are created with
    /// sequentially increasing integers.
--- a/third_party/rust/cranelift-codegen/src/ir/dfg.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/dfg.rs
@ -21,6 +21,9 @@ use core::mem;
 use core::ops::{Index, IndexMut};
 use core::u16;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// A data flow graph defines all instructions and basic blocks in a function as well as
 /// the data flow dependencies between them. The DFG also tracks values which can be either
 /// instruction results or block parameters.
@ -29,6 +32,7 @@ use core::u16;
 /// `Layout` data structure which forms the other half of the function representation.
 ///
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct DataFlowGraph {
    /// Data about all of the instructions in the function, including opcodes and operands.
    /// The instructions in this map are not in program order. That is tracked by `Layout`, along
@ -416,6 +420,7 @@ impl ValueDef {

 /// Internal table storage for extended values.
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 enum ValueData {
    /// Value is defined by an instruction.
    Inst { ty: Type, num: u16, inst: Inst },
@ -935,6 +940,7 @@ impl DataFlowGraph {
 /// branches to this block must provide matching arguments, and the arguments to the entry block must
 /// match the function arguments.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 struct BlockData {
    /// List of parameters to this block.
    params: ValueList,
--- a/third_party/rust/cranelift-codegen/src/ir/entities.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/entities.rs
@ -33,6 +33,7 @@ use serde::{Deserialize, Serialize};
 ///
 /// While the order is stable, it is arbitrary and does not necessarily resemble the layout order.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Block(u32);
 entity_impl!(Block, "block");

@ -65,6 +66,7 @@ impl Block {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Value(u32);
 entity_impl!(Value, "v");

@ -97,6 +99,7 @@ impl Value {
 ///
 /// While the order is stable, it is arbitrary and does not necessarily resemble the layout order.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Inst(u32);
 entity_impl!(Inst, "inst");

@ -152,6 +155,7 @@ impl StackSlot {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct GlobalValue(u32);
 entity_impl!(GlobalValue, "gv");

@ -177,6 +181,7 @@ impl GlobalValue {
 /// While the order is stable, it is arbitrary and does not necessarily resemble the order in which
 /// the constants are written in the constant pool.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Constant(u32);
 entity_impl!(Constant, "const");

@ -202,6 +207,7 @@ impl Constant {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Immediate(u32);
 entity_impl!(Immediate, "imm");

@ -267,6 +273,7 @@ impl JumpTable {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct FuncRef(u32);
 entity_impl!(FuncRef, "fn");

@ -298,6 +305,7 @@ impl FuncRef {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct SigRef(u32);
 entity_impl!(SigRef, "sig");

@ -323,6 +331,7 @@ impl SigRef {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Heap(u32);
 entity_impl!(Heap, "heap");

@ -349,6 +358,7 @@ impl Heap {
 ///
 /// While the order is stable, it is arbitrary.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Table(u32);
 entity_impl!(Table, "table");

@ -367,6 +377,7 @@ impl Table {

 /// An opaque reference to any of the entities defined in this module that can appear in CLIF IR.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum AnyEntity {
    /// The whole function.
    Function,
--- a/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
@ -256,8 +256,19 @@ impl fmt::Display for AbiParam {

 /// Function argument extension options.
 ///
-/// On some architectures, small integer function arguments are extended to the width of a
-/// general-purpose register.
+/// On some architectures, small integer function arguments and/or return values are extended to
+/// the width of a general-purpose register.
+///
+/// This attribute specifies how an argument or return value should be extended *if the platform
+/// and ABI require it*. Because the frontend (CLIF generator) does not know anything about the
+/// particulars of the target's ABI, and the CLIF should be platform-independent, these attributes
+/// specify *how* to extend (according to the signedness of the original program) rather than
+/// *whether* to extend.
+///
+/// For example, on x86-64, the SystemV ABI does not require extensions of narrow values, so these
+/// `ArgumentExtension` attributes are ignored; but in the Baldrdash (SpiderMonkey) ABI on the same
+/// platform, all narrow values *are* extended, so these attributes may lead to extra
+/// zero/sign-extend instructions in the generated machine code.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ArgumentExtension {
@ -398,6 +409,7 @@ impl FromStr for ArgumentPurpose {
 ///
 /// Information about a function that can be called directly with a direct `call` instruction.
 #[derive(Clone, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ExtFuncData {
    /// Name of the external function.
    pub name: ExternalName,
--- a/third_party/rust/cranelift-codegen/src/ir/extname.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/extname.rs
@ -9,6 +9,9 @@ use core::cmp;
 use core::fmt::{self, Write};
 use core::str::FromStr;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 const TESTCASE_NAME_LENGTH: usize = 16;

 /// The name of an external is either a reference to a user-defined symbol
@ -23,6 +26,7 @@ const TESTCASE_NAME_LENGTH: usize = 16;
 /// In particular, many `.clif` test files use function names to identify
 /// functions.
 #[derive(Debug, Clone, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ExternalName {
    /// A name in a user-defined symbol table. Cranelift does not interpret
    /// these numbers in any way.
--- a/third_party/rust/cranelift-codegen/src/ir/function.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/function.rs
@ -18,15 +18,63 @@ use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
 use crate::regalloc::{EntryRegDiversions, RegDiversions};
 use crate::value_label::ValueLabelsRanges;
 use crate::write::write_function;
+#[cfg(feature = "enable-serde")]
+use alloc::string::String;
 use alloc::vec::Vec;
 use core::fmt;

-/// A function.
+#[cfg(feature = "enable-serde")]
+use serde::de::{Deserializer, Error};
+#[cfg(feature = "enable-serde")]
+use serde::ser::Serializer;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// A version marker used to ensure that serialized clif ir is never deserialized with a
+/// different version of Cranelift.
+#[derive(Copy, Clone, Debug)]
+pub struct VersionMarker;
+
+#[cfg(feature = "enable-serde")]
+impl Serialize for VersionMarker {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        crate::VERSION.serialize(serializer)
+    }
+}
+
+#[cfg(feature = "enable-serde")]
+impl<'de> Deserialize<'de> for VersionMarker {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let version = String::deserialize(deserializer)?;
+        if version != crate::VERSION {
+            return Err(D::Error::custom(&format!(
+                "Expected a clif ir function for version {}, found one for version {}",
+                crate::VERSION,
+                version,
+            )));
+        }
+        Ok(VersionMarker)
+    }
+}
+
 ///
 /// Functions can be cloned, but it is not a very fast operation.
 /// The clone will have all the same entity numbers as the original.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Function {
+    /// A version marker used to ensure that serialized clif ir is never deserialized with a
+    /// different version of Cranelift.
+    // Note: This must be the first field to ensure that Serde will deserialize it before
+    // attempting to deserialize other fields that are potentially changed between versions.
+    pub version_marker: VersionMarker,
+
    /// Name of this function. Mostly used by `.clif` files.
    pub name: ExternalName,

@ -109,6 +157,7 @@ impl Function {
    /// Create a function with the given name and signature.
    pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
        Self {
+            version_marker: VersionMarker,
            name,
            signature: sig,
            old_signature: None,
--- a/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
@ -6,8 +6,12 @@ use crate::isa::TargetIsa;
 use crate::machinst::RelocDistance;
 use core::fmt;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Information about a global value declaration.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum GlobalValueData {
    /// Value is the address of the VM context struct.
    VMContext,
--- a/third_party/rust/cranelift-codegen/src/ir/heap.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/heap.rs
@ -4,8 +4,12 @@ use crate::ir::immediates::Uimm64;
 use crate::ir::{GlobalValue, Type};
 use core::fmt;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Information about a heap declaration.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct HeapData {
    /// The address of the start of the heap's storage.
    pub base: GlobalValue,
@ -26,6 +30,7 @@ pub struct HeapData {

 /// Style of heap including style-specific information.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum HeapStyle {
    /// A dynamic heap can be relocated to a different base address when it is grown.
    Dynamic {
--- a/third_party/rust/cranelift-codegen/src/ir/immediates.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/immediates.rs
@ -48,6 +48,7 @@ impl IntoBytes for Vec<u8> {
 /// An `Imm64` operand can also be used to represent immediate values of smaller integer types by
 /// sign-extending to `i64`.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Imm64(i64);

 impl Imm64 {
@ -148,6 +149,7 @@ impl FromStr for Imm64 {
 /// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by
 /// zero-extending to `i64`.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Uimm64(u64);

 impl Uimm64 {
@ -279,6 +281,7 @@ pub type Uimm8 = u8;
 ///
 /// This is used to represent sizes of memory objects.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Uimm32(u32);

 impl Into<u32> for Uimm32 {
@ -362,6 +365,7 @@ impl From<&[u8]> for V128Imm {
 /// This is used to encode an immediate offset for load/store instructions. All supported ISAs have
 /// a maximum load/store offset that fits in an `i32`.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Offset32(i32);

 impl Offset32 {
@ -451,6 +455,7 @@ impl FromStr for Offset32 {
 ///
 /// All bit patterns are allowed.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 #[repr(C)]
 pub struct Ieee32(u32);

@ -459,6 +464,7 @@ pub struct Ieee32(u32);
 ///
 /// All bit patterns are allowed.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 #[repr(C)]
 pub struct Ieee64(u64);

--- a/third_party/rust/cranelift-codegen/src/ir/instructions.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/instructions.rs
@ -13,6 +13,9 @@ use core::num::NonZeroU32;
 use core::ops::{Deref, DerefMut};
 use core::str::FromStr;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
 use crate::isa;

--- a/third_party/rust/cranelift-codegen/src/ir/jumptable.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/jumptable.rs
@ -8,10 +8,14 @@ use alloc::vec::Vec;
 use core::fmt::{self, Display, Formatter};
 use core::slice::{Iter, IterMut};

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Contents of a jump table.
 ///
 /// All jump tables use 0-based indexing and are densely populated.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct JumpTableData {
    // Table entries.
    table: Vec<Block>,
@ -64,6 +68,11 @@ impl JumpTableData {
    pub fn iter_mut(&mut self) -> IterMut<Block> {
        self.table.iter_mut()
    }
+
+    /// Clears all entries in this jump table.
+    pub fn clear(&mut self) {
+        self.table.clear();
+    }
 }

 impl Display for JumpTableData {
--- a/third_party/rust/cranelift-codegen/src/ir/layout.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/layout.rs
@ -781,6 +781,97 @@ impl<'f> DoubleEndedIterator for Insts<'f> {
    }
 }

+/// A custom serialize and deserialize implementation for [`Layout`].
+///
+/// This doesn't use a derived implementation as [`Layout`] is a manual implementation of a linked
+/// list. Storing it directly as a regular list saves a lot of space.
+///
+/// The following format is used. (notated in EBNF form)
+///
+/// ```plain
+/// data = block_data * ;
+/// block_data = "block_id" , "inst_count" , ( "inst_id" * ) ;
+/// ```
+#[cfg(feature = "enable-serde")]
+mod serde {
+    use ::serde::de::{Deserializer, Error, SeqAccess, Visitor};
+    use ::serde::ser::{SerializeSeq, Serializer};
+    use ::serde::{Deserialize, Serialize};
+    use core::convert::TryFrom;
+    use core::fmt;
+    use core::marker::PhantomData;
+
+    use super::*;
+
+    impl Serialize for Layout {
+        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: Serializer,
+        {
+            let size = self.blocks().count() * 2
+                + self
+                    .blocks()
+                    .map(|block| self.block_insts(block).count())
+                    .sum::<usize>();
+            let mut seq = serializer.serialize_seq(Some(size))?;
+            for block in self.blocks() {
+                seq.serialize_element(&block)?;
+                seq.serialize_element(&u32::try_from(self.block_insts(block).count()).unwrap())?;
+                for inst in self.block_insts(block) {
+                    seq.serialize_element(&inst)?;
+                }
+            }
+            seq.end()
+        }
+    }
+
+    impl<'de> Deserialize<'de> for Layout {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: Deserializer<'de>,
+        {
+            deserializer.deserialize_seq(LayoutVisitor {
+                marker: PhantomData,
+            })
+        }
+    }
+
+    struct LayoutVisitor {
+        marker: PhantomData<fn() -> Layout>,
+    }
+
+    impl<'de> Visitor<'de> for LayoutVisitor {
+        type Value = Layout;
+
+        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+            write!(formatter, "a `cranelift_codegen::ir::Layout`")
+        }
+
+        fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
+        where
+            M: SeqAccess<'de>,
+        {
+            let mut layout = Layout::new();
+
+            while let Some(block) = access.next_element::<Block>()? {
+                layout.append_block(block);
+
+                let count = access
+                    .next_element::<u32>()?
+                    .ok_or_else(|| Error::missing_field("count"))?;
+                for _ in 0..count {
+                    let inst = access
+                        .next_element::<Inst>()?
+                        .ok_or_else(|| Error::missing_field("inst"))?;
+                    layout.append_inst(inst, block);
+                }
+            }
+
+            Ok(layout)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::Layout;
--- a/third_party/rust/cranelift-codegen/src/ir/libcall.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/libcall.rs
@ -63,6 +63,7 @@ pub enum LibCall {

    /// Elf __tls_get_addr
    ElfTlsGetAddr,
+    // When adding a new variant make sure to add it to `all_libcalls` too.
 }

 impl fmt::Display for LibCall {
@ -136,6 +137,33 @@ impl LibCall {
            _ => return None,
        })
    }
+
+    /// Get a list of all known `LibCall`'s.
+    pub fn all_libcalls() -> &'static [LibCall] {
+        use LibCall::*;
+        &[
+            Probestack,
+            UdivI64,
+            SdivI64,
+            UremI64,
+            SremI64,
+            IshlI64,
+            UshrI64,
+            SshrI64,
+            CeilF32,
+            CeilF64,
+            FloorF32,
+            FloorF64,
+            TruncF32,
+            TruncF64,
+            NearestF32,
+            NearestF64,
+            Memcpy,
+            Memset,
+            Memmove,
+            ElfTlsGetAddr,
+        ]
+    }
 }

 /// Get a function reference for `libcall` in `func`, following the signature
--- a/third_party/rust/cranelift-codegen/src/ir/memflags.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/memflags.rs
@ -2,20 +2,40 @@

 use core::fmt;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 enum FlagBit {
    Notrap,
    Aligned,
    Readonly,
+    LittleEndian,
+    BigEndian,
 }

-const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"];
+const NAMES: [&str; 5] = ["notrap", "aligned", "readonly", "little", "big"];
+
+/// Endianness of a memory access.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum Endianness {
+    /// Little-endian
+    Little,
+    /// Big-endian
+    Big,
+}

 /// Flags for memory operations like load/store.
 ///
 /// Each of these flags introduce a limited form of undefined behavior. The flags each enable
 /// certain optimizations that need to make additional assumptions. Generally, the semantics of a
 /// program does not change when a flag is removed, but adding a flag will.
+///
+/// In addition, the flags determine the endianness of the memory access.  By default,
+/// any memory access uses the native endianness determined by the target ISA.  This can
+/// be overridden for individual accesses by explicitly specifying little- or big-endian
+/// semantics via the flags.
 #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct MemFlags {
    bits: u8,
 }
@ -48,16 +68,48 @@ impl MemFlags {
    /// Set a flag bit by name.
    ///
    /// Returns true if the flag was found and set, false for an unknown flag name.
+    /// Will also return false when trying to set inconsistent endianness flags.
    pub fn set_by_name(&mut self, name: &str) -> bool {
        match NAMES.iter().position(|&s| s == name) {
            Some(bit) => {
-                self.bits |= 1 << bit;
-                true
+                let bits = self.bits | 1 << bit;
+                if (bits & (1 << FlagBit::LittleEndian as usize)) != 0
+                    && (bits & (1 << FlagBit::BigEndian as usize)) != 0
+                {
+                    false
+                } else {
+                    self.bits = bits;
+                    true
+                }
            }
            None => false,
        }
    }

+    /// Return endianness of the memory access.  This will return the endianness
+    /// explicitly specified by the flags if any, and will default to the native
+    /// endianness otherwise.  The native endianness has to be provided by the
+    /// caller since it is not explicitly encoded in CLIF IR -- this allows a
+    /// front end to create IR without having to know the target endianness.
+    pub fn endianness(self, native_endianness: Endianness) -> Endianness {
+        if self.read(FlagBit::LittleEndian) {
+            Endianness::Little
+        } else if self.read(FlagBit::BigEndian) {
+            Endianness::Big
+        } else {
+            native_endianness
+        }
+    }
+
+    /// Set endianness of the memory access.
+    pub fn set_endianness(&mut self, endianness: Endianness) {
+        match endianness {
+            Endianness::Little => self.set(FlagBit::LittleEndian),
+            Endianness::Big => self.set(FlagBit::BigEndian),
+        };
+        assert!(!(self.read(FlagBit::LittleEndian) && self.read(FlagBit::BigEndian)));
+    }
+
    /// Test if the `notrap` flag is set.
    ///
    /// Normally, trapping is part of the semantics of a load/store operation. If the platform
--- a/third_party/rust/cranelift-codegen/src/ir/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/mod.rs
@ -50,7 +50,7 @@ pub use crate::ir::instructions::{
 pub use crate::ir::jumptable::JumpTableData;
 pub use crate::ir::layout::Layout;
 pub use crate::ir::libcall::{get_probestack_funcref, LibCall};
-pub use crate::ir::memflags::MemFlags;
+pub use crate::ir::memflags::{Endianness, MemFlags};
 pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
 pub use crate::ir::sourceloc::SourceLoc;
 pub use crate::ir::stackslot::{StackLayoutInfo, StackSlotData, StackSlotKind, StackSlots};
@ -58,6 +58,7 @@ pub use crate::ir::table::TableData;
 pub use crate::ir::trapcode::TrapCode;
 pub use crate::ir::types::Type;
 pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc};
+pub use crate::value_label::LabelValueLoc;
 pub use cranelift_codegen_shared::condcodes;

 use crate::binemit;
@ -90,6 +91,7 @@ entity_impl!(ValueLabel, "val");

 /// A label of a Value.
 #[derive(Debug, Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ValueLabelStart {
    /// Source location when it is in effect
    pub from: SourceLoc,
@ -100,6 +102,7 @@ pub struct ValueLabelStart {

 /// Value label assignements: label starts or value aliases.
 #[derive(Debug, Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ValueLabelAssignments {
    /// Original value labels assigned at transform.
    Starts(alloc::vec::Vec<ValueLabelStart>),
--- a/third_party/rust/cranelift-codegen/src/ir/table.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/table.rs
@ -4,8 +4,12 @@ use crate::ir::immediates::Uimm64;
 use crate::ir::{GlobalValue, Type};
 use core::fmt;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Information about a table declaration.
 #[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct TableData {
    /// Global value giving the address of the start of the table.
    pub base_gv: GlobalValue,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
@ -4,15 +4,18 @@ use crate::ir;
 use crate::ir::types;
 use crate::ir::types::*;
 use crate::ir::MemFlags;
+use crate::ir::Opcode;
+use crate::ir::{ExternalName, LibCall};
 use crate::isa;
 use crate::isa::aarch64::{inst::EmitState, inst::*};
+use crate::isa::unwind::UnwindInst;
 use crate::machinst::*;
 use crate::settings;
 use crate::{CodegenError, CodegenResult};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use regalloc::{RealReg, Reg, RegClass, Set, Writable};
-use smallvec::SmallVec;
+use smallvec::{smallvec, SmallVec};

 // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
 // these ABIs are very similar.
@ -76,7 +79,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
        match &param.purpose {
            &ir::ArgumentPurpose::VMContext => {
                // This is SpiderMonkey's `WasmTlsReg`.
-                Some(ABIArg::Reg(
+                Some(ABIArg::reg(
                    xreg(BALDRDASH_TLS_REG).to_real_reg(),
                    ir::types::I64,
                    param.extension,
@ -85,7 +88,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
            }
            &ir::ArgumentPurpose::SignatureId => {
                // This is SpiderMonkey's `WasmTableCallSigReg`.
-                Some(ABIArg::Reg(
+                Some(ABIArg::reg(
                    xreg(BALDRDASH_SIG_REG).to_real_reg(),
                    ir::types::I64,
                    param.extension,
@ -95,7 +98,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
            &ir::ArgumentPurpose::CalleeTLS => {
                // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
                assert!(call_conv == isa::CallConv::Baldrdash2020);
-                Some(ABIArg::Stack(
+                Some(ABIArg::stack(
                    BALDRDASH_CALLEE_TLS_OFFSET,
                    ir::types::I64,
                    ir::ArgumentExtension::None,
@ -105,7 +108,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
            &ir::ArgumentPurpose::CallerTLS => {
                // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
                assert!(call_conv == isa::CallConv::Baldrdash2020);
-                Some(ABIArg::Stack(
+                Some(ABIArg::stack(
                    BALDRDASH_CALLER_TLS_OFFSET,
                    ir::types::I64,
                    ir::ArgumentExtension::None,
@ -159,6 +162,7 @@ impl ABIMachineSpec for AArch64MachineDeps {

    fn compute_arg_locs(
        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
@ -167,6 +171,21 @@ impl ABIMachineSpec for AArch64MachineDeps {
        let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;

        // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
+        //
+        // MacOS aarch64 is slightly different, see also
+        // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
+        // We are diverging from the MacOS aarch64 implementation in the
+        // following ways:
+        // - sign- and zero- extensions of data types less than 32 bits are not
+        // implemented yet.
+        // - i128 arguments passing isn't implemented yet in the standard (non
+        // MacOS) aarch64 ABI.
+        // - we align the arguments stack space to a 16-bytes boundary, while
+        // the MacOS allows aligning only on 8 bytes. In practice it means we're
+        // slightly overallocating when calling, which is fine, and doesn't
+        // break our other invariants that the stack is always allocated in
+        // 16-bytes chunks.
+
        let mut next_xreg = 0;
        let mut next_vreg = 0;
        let mut next_stack: u64 = 0;
@ -208,7 +227,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
                | &ir::ArgumentPurpose::StackLimit
                | &ir::ArgumentPurpose::SignatureId
                | &ir::ArgumentPurpose::CallerTLS
-                | &ir::ArgumentPurpose::CalleeTLS => {}
+                | &ir::ArgumentPurpose::CalleeTLS
+                | &ir::ArgumentPurpose::StructReturn
+                | &ir::ArgumentPurpose::StructArgument(_) => {}
                _ => panic!(
                    "Unsupported argument purpose {:?} in signature: {:?}",
                    param.purpose, params
@ -220,7 +241,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
                "Invalid type for AArch64: {:?}",
                param.value_type
            );
-            let rc = Inst::rc_for_type(param.value_type).unwrap();
+            let (rcs, _) = Inst::rc_for_type(param.value_type).unwrap();
+            assert!(rcs.len() == 1, "Multi-reg values not supported yet");
+            let rc = rcs[0];

            let next_reg = match rc {
                RegClass::I64 => &mut next_xreg,
@ -231,13 +254,23 @@ impl ABIMachineSpec for AArch64MachineDeps {
            if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
                assert!(rc == RegClass::I64);
                ret.push(param);
+            } else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
+                let offset = next_stack as i64;
+                let size = size as u64;
+                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
+                next_stack += size;
+                ret.push(ABIArg::StructArg {
+                    offset,
+                    size,
+                    purpose: param.purpose,
+                });
            } else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
                let reg = match rc {
                    RegClass::I64 => xreg(*next_reg),
                    RegClass::V128 => vreg(*next_reg),
                    _ => unreachable!(),
                };
-                ret.push(ABIArg::Reg(
+                ret.push(ABIArg::reg(
                    reg.to_real_reg(),
                    param.value_type,
                    param.extension,
@ -246,14 +279,25 @@ impl ABIMachineSpec for AArch64MachineDeps {
                *next_reg += 1;
                remaining_reg_vals -= 1;
            } else {
-                // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
-                // stack alignment happens separately after all args.)
+                // Compute the stack slot's size.
                let size = (ty_bits(param.value_type) / 8) as u64;
-                let size = std::cmp::max(size, 8);
-                // Align.
+
+                let size = if call_conv != isa::CallConv::AppleAarch64 {
+                    // Every arg takes a minimum slot of 8 bytes. (16-byte stack
+                    // alignment happens separately after all args.)
+                    std::cmp::max(size, 8)
+                } else {
+                    // MacOS aarch64 allows stack slots with sizes less than 8
+                    // bytes. They still need to be properly aligned on their
+                    // natural data alignment, though.
+                    size
+                };
+
+                // Align the stack slot.
                debug_assert!(size.is_power_of_two());
-                next_stack = (next_stack + size - 1) & !(size - 1);
-                ret.push(ABIArg::Stack(
+                next_stack = align_to(next_stack, size);
+
+                ret.push(ABIArg::stack(
                    next_stack as i64,
                    param.value_type,
                    param.extension,
@ -270,14 +314,14 @@ impl ABIMachineSpec for AArch64MachineDeps {
        let extra_arg = if add_ret_area_ptr {
            debug_assert!(args_or_rets == ArgsOrRets::Args);
            if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
-                ret.push(ABIArg::Reg(
+                ret.push(ABIArg::reg(
                    xreg(next_xreg).to_real_reg(),
                    I64,
                    ir::ArgumentExtension::None,
                    ir::ArgumentPurpose::Normal,
                ));
            } else {
-                ret.push(ABIArg::Stack(
+                ret.push(ABIArg::stack(
                    next_stack as i64,
                    I64,
                    ir::ArgumentExtension::None,
@ -290,7 +334,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
            None
        };

-        next_stack = (next_stack + 15) & !15;
+        next_stack = align_to(next_stack, 16);

        // To avoid overflow issues, limit the arg/return size to something
        // reasonable -- here, 128 MB.
@ -345,7 +389,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        Inst::Ret
    }

-    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
        let imm = imm as u64;
        let mut insts = SmallVec::new();
        if let Some(imm12) = Imm12::maybe_from_u64(imm) {
@ -370,7 +414,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        insts
    }

-    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
        let mut insts = SmallVec::new();
        insts.push(Inst::AluRRRExtend {
            alu_op: ALUOp::SubS64,
@ -411,7 +455,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
    }

-    fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
        if amount == 0 {
            return SmallVec::new();
        }
@ -455,8 +499,17 @@ impl ABIMachineSpec for AArch64MachineDeps {
        }
    }

-    fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
+    fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> {
        let mut insts = SmallVec::new();
+
+        if flags.unwind_info() {
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::Aarch64SetPointerAuth {
+                    return_addresses: false,
+                },
+            });
+        }
+
        // stp fp (x29), lr (x30), [sp, #-16]!
        insts.push(Inst::StoreP64 {
            rt: fp_reg(),
@ -467,6 +520,15 @@ impl ABIMachineSpec for AArch64MachineDeps {
            ),
            flags: MemFlags::trusted(),
        });
+
+        if flags.unwind_info() {
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::PushFrameRegs {
+                    offset_upward_to_caller_sp: 16, // FP, LR
+                },
+            });
+        }
+
        // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
        // the usual encoding (`ORR`) does not work with SP.
        insts.push(Inst::AluRRImm12 {
@ -481,20 +543,14 @@ impl ABIMachineSpec for AArch64MachineDeps {
        insts
    }

-    fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
+    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
        let mut insts = SmallVec::new();

-        // MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
-        // MOV to SP is an alias of ADD.
-        insts.push(Inst::AluRRImm12 {
-            alu_op: ALUOp::Add64,
-            rd: writable_stack_reg(),
-            rn: fp_reg(),
-            imm12: Imm12 {
-                bits: 0,
-                shift12: false,
-            },
-        });
+        // N.B.: sp is already adjusted to the appropriate place by the
+        // clobber-restore code (which also frees the fixed frame). Hence, there
+        // is no need for the usual `mov sp, fp` here.
+
+        // `ldp fp, lr, [sp], #16`
        insts.push(Inst::LoadP64 {
            rt: writable_fp_reg(),
            rt2: writable_link_reg(),
@ -504,29 +560,56 @@ impl ABIMachineSpec for AArch64MachineDeps {
            ),
            flags: MemFlags::trusted(),
        });
-
        insts
    }

+    fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
+        // TODO: implement if we ever require stack probes on an AArch64 host
+        // (unlikely unless Lucet is ported)
+        smallvec![]
+    }
+
    // Returns stack bytes used as well as instructions. Does not adjust
    // nominal SP offset; abi_impl generic code will do that.
    fn gen_clobber_save(
        call_conv: isa::CallConv,
-        _: &settings::Flags,
+        flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Inst; 16]>) {
        let mut insts = SmallVec::new();
        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);

        let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
-        let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32;
-        insts.extend(Self::gen_sp_reg_adjust(
-            -(total_save_bytes + fixed_frame_storage_size as i32),
-        ));
+        let total_save_bytes = int_save_bytes + vec_save_bytes;
+        let clobber_size = total_save_bytes as i32;

-        for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
+        if flags.unwind_info() {
+            // The *unwind* frame (but not the actual frame) starts at the
+            // clobbers, just below the saved FP/LR pair.
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::DefineNewFrame {
+                    offset_downward_to_clobbers: clobber_size as u32,
+                    offset_upward_to_caller_sp: 16, // FP, LR
+                },
+            });
+        }
+
+        // We use pre-indexed addressing modes here, rather than the possibly
+        // more efficient "subtract sp once then used fixed offsets" scheme,
+        // because (i) we cannot necessarily guarantee that the offset of a
+        // clobber-save slot will be within a SImm7Scaled (+504-byte) offset
+        // range of the whole frame including other slots, it is more complex to
+        // conditionally generate a two-stage SP adjustment (clobbers then fixed
+        // frame) otherwise, and generally we just want to maintain simplicity
+        // here for maintainability.  Because clobbers are at the top of the
+        // frame, just below FP, all that is necessary is to use the pre-indexed
+        // "push" `[sp, #-16]!` addressing mode.
+        //
+        // `frame_offset` tracks offset above start-of-clobbers for unwind-info
+        // purposes.
+        let mut clobber_offset = clobber_size as u32;
+        for reg_pair in clobbered_int.chunks(2) {
            let (r1, r2) = if reg_pair.len() == 2 {
                // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
                (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
@ -537,28 +620,56 @@ impl ABIMachineSpec for AArch64MachineDeps {
            debug_assert!(r1.get_class() == RegClass::I64);
            debug_assert!(r2.get_class() == RegClass::I64);

-            // stp r1, r2, [sp, #(i * #16)]
+            // stp r1, r2, [sp, #-16]!
            insts.push(Inst::StoreP64 {
                rt: r1,
                rt2: r2,
-                mem: PairAMode::SignedOffset(
-                    stack_reg(),
-                    SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
+                mem: PairAMode::PreIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
                ),
                flags: MemFlags::trusted(),
            });
+            if flags.unwind_info() {
+                clobber_offset -= 8;
+                if r2 != zero_reg() {
+                    insts.push(Inst::Unwind {
+                        inst: UnwindInst::SaveReg {
+                            clobber_offset,
+                            reg: r2.to_real_reg(),
+                        },
+                    });
+                }
+                clobber_offset -= 8;
+                insts.push(Inst::Unwind {
+                    inst: UnwindInst::SaveReg {
+                        clobber_offset,
+                        reg: r1.to_real_reg(),
+                    },
+                });
+            }
        }

-        let vec_offset = int_save_bytes;
-        for (i, reg) in clobbered_vec.iter().enumerate() {
+        for reg in clobbered_vec.iter() {
            insts.push(Inst::FpuStore128 {
                rd: reg.to_reg().to_reg(),
-                mem: AMode::Unscaled(
-                    stack_reg(),
-                    SImm9::maybe_from_i64((vec_offset + (i * 16)) as i64).unwrap(),
-                ),
+                mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()),
                flags: MemFlags::trusted(),
            });
+            if flags.unwind_info() {
+                clobber_offset -= 16;
+                insts.push(Inst::Unwind {
+                    inst: UnwindInst::SaveReg {
+                        clobber_offset,
+                        reg: reg.to_reg(),
+                    },
+                });
+            }
+        }
+
+        // Allocate the fixed frame below the clobbers if necessary.
+        if fixed_frame_storage_size > 0 {
+            insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)));
        }

        (total_save_bytes as u64, insts)
@ -568,14 +679,25 @@ impl ABIMachineSpec for AArch64MachineDeps {
        call_conv: isa::CallConv,
        flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
-        _fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
+        fixed_frame_storage_size: u32,
    ) -> SmallVec<[Inst; 16]> {
        let mut insts = SmallVec::new();
        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);

-        let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
-        for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
+        // Free the fixed frame if necessary.
+        if fixed_frame_storage_size > 0 {
+            insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
+        }
+
+        for reg in clobbered_vec.iter().rev() {
+            insts.push(Inst::FpuLoad128 {
+                rd: Writable::from_reg(reg.to_reg().to_reg()),
+                mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
+                flags: MemFlags::trusted(),
+            });
+        }
+
+        for reg_pair in clobbered_int.chunks(2).rev() {
            let (r1, r2) = if reg_pair.len() == 2 {
                (
                    reg_pair[0].map(|r| r.to_reg()),
@ -588,37 +710,18 @@ impl ABIMachineSpec for AArch64MachineDeps {
            debug_assert!(r1.to_reg().get_class() == RegClass::I64);
            debug_assert!(r2.to_reg().get_class() == RegClass::I64);

-            // ldp r1, r2, [sp, #(i * 16)]
+            // ldp r1, r2, [sp], #16
            insts.push(Inst::LoadP64 {
                rt: r1,
                rt2: r2,
-                mem: PairAMode::SignedOffset(
-                    stack_reg(),
-                    SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
+                mem: PairAMode::PostIndexed(
+                    writable_stack_reg(),
+                    SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
                ),
                flags: MemFlags::trusted(),
            });
        }

-        for (i, reg) in clobbered_vec.iter().enumerate() {
-            insts.push(Inst::FpuLoad128 {
-                rd: Writable::from_reg(reg.to_reg().to_reg()),
-                mem: AMode::Unscaled(
-                    stack_reg(),
-                    SImm9::maybe_from_i64(((i * 16) + int_save_bytes) as i64).unwrap(),
-                ),
-                flags: MemFlags::trusted(),
-            });
-        }
-
-        // For non-baldrdash calling conventions, the frame pointer
-        // will be moved into the stack pointer in the epilogue, so we
-        // can skip restoring the stack pointer value with this `add`.
-        if call_conv.extends_baldrdash() {
-            let total_save_bytes = (int_save_bytes + vec_save_bytes) as i32;
-            insts.extend(Self::gen_sp_reg_adjust(total_save_bytes));
-        }
-
        // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
        // register. We may have allocated it for something else and clobbered
        // it, but the ABI expects us to leave the TLS register unchanged.
@ -700,6 +803,34 @@ impl ABIMachineSpec for AArch64MachineDeps {
        insts
    }

+    fn gen_memcpy(
+        call_conv: isa::CallConv,
+        dst: Reg,
+        src: Reg,
+        size: usize,
+    ) -> SmallVec<[Self::I; 8]> {
+        // Baldrdash should not use struct args.
+        assert!(!call_conv.extends_baldrdash());
+        let mut insts = SmallVec::new();
+        let arg0 = writable_xreg(0);
+        let arg1 = writable_xreg(1);
+        let arg2 = writable_xreg(2);
+        insts.push(Inst::gen_move(arg0, dst, I64));
+        insts.push(Inst::gen_move(arg1, src, I64));
+        insts.extend(Inst::load_constant(arg2, size as u64).into_iter());
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Memcpy),
+                uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()],
+                defs: Self::get_regs_clobbered_by_call(call_conv),
+                opcode: Opcode::Call,
+                caller_callconv: call_conv,
+                callee_callconv: call_conv,
+            }),
+        });
+        insts
+    }
+
    fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
        // We allocate in terms of 8-byte slots.
        match (rc, ty) {
@ -736,6 +867,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
        }
        caller_saved
    }
+
+    fn get_ext_mode(
+        call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        if call_conv.extends_baldrdash() {
+            // Baldrdash (SpiderMonkey) always extends args and return values to the full register.
+            specified
+        } else {
+            // No other supported ABI on AArch64 does so.
+            ir::ArgumentExtension::None
+        }
+    }
 }

 /// Is this type supposed to be seen on this machine? E.g. references of the
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@ -3,7 +3,7 @@
 // Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]

-use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
+use crate::ir::types::*;
 use crate::ir::Type;
 use crate::isa::aarch64::inst::*;
 use crate::machinst::{ty_bits, MachLabel};
@ -209,6 +209,19 @@ impl AMode {
    pub fn label(label: MemLabel) -> AMode {
        AMode::Label(label)
    }
+
+    /// Does the address resolve to just a register value, with no offset or
+    /// other computation?
+    pub fn is_reg(&self) -> Option<Reg> {
+        match self {
+            &AMode::UnsignedOffset(r, uimm12) if uimm12.value() == 0 => Some(r),
+            &AMode::Unscaled(r, imm9) if imm9.value() == 0 => Some(r),
+            &AMode::RegOffset(r, off, _) if off == 0 => Some(r),
+            &AMode::FPOffset(off, _) if off == 0 => Some(fp_reg()),
+            &AMode::SPOffset(off, _) if off == 0 => Some(stack_reg()),
+            _ => None,
+        }
+    }
 }

 /// A memory argument to a load/store-pair.
@ -588,6 +601,14 @@ impl ScalarSize {
        }
    }

+    /// Convert from an integer operand size.
+    pub fn from_operand_size(size: OperandSize) -> ScalarSize {
+        match size {
+            OperandSize::Size32 => ScalarSize::Size32,
+            OperandSize::Size64 => ScalarSize::Size64,
+        }
+    }
+
    /// Convert from a type into the smallest size that fits.
    pub fn from_ty(ty: Type) -> ScalarSize {
        Self::from_bits(ty_bits(ty))
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@ -258,10 +258,6 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
        | machreg_to_vec(rt.to_reg())
 }

-fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
-    (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
-}
-
 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
    (top11 << 21)
        | (machreg_to_vec(rm) << 16)
@ -313,6 +309,12 @@ fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
        | (cond.invert().bits() << 12)
 }

+fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
+    0b110_11010100_11111_0000_00_11111_00000
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.invert().bits() << 12)
+}
+
 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
    0b0_1_1_11010010_00000_0000_10_00000_0_0000
        | size.sf_bit() << 31
@ -322,6 +324,29 @@ fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond)
        | nzcv.bits()
 }

+fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
+    match size {
+        OperandSize::Size64 => {
+            debug_assert!(immr <= 63);
+            debug_assert!(imms <= 63);
+        }
+        OperandSize::Size32 => {
+            debug_assert!(immr <= 31);
+            debug_assert!(imms <= 31);
+        }
+    }
+    debug_assert_eq!(opc & 0b11, opc);
+    let n_bit = size.sf_bit();
+    0b0_00_100110_0_000000_000000_00000_00000
+        | size.sf_bit() << 31
+        | u32::from(opc) << 29
+        | n_bit << 22
+        | u32::from(immr) << 16
+        | u32::from(imms) << 10
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
    0b00001110_101_00000_00011_1_00000_00000
        | ((is_16b as u32) << 30)
@ -437,6 +462,16 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
        | machreg_to_gpr(rt)
 }

+fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
+    debug_assert_eq!(size & 0b11, size);
+
+    0b00_0010001_1_1_00000_1_11111_00000_00000
+        | size << 30
+        | machreg_to_gpr(rs.to_reg()) << 16
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_gpr(rt)
+}
+
 fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
    let abc = (imm >> 5) as u32;
    let defgh = (imm & 0b11111) as u32;
@ -517,7 +552,6 @@ impl MachInstEmitInfo for EmitInfo {
 impl MachInstEmit for Inst {
    type State = EmitState;
    type Info = EmitInfo;
-    type UnwindInfo = super::unwind::AArch64UnwindInfo;

    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
        // N.B.: we *must* not exceed the "worst-case size" used to compute
@ -1045,6 +1079,9 @@ impl MachInstEmit for Inst {
            &Inst::CSet { rd, cond } => {
                sink.put4(enc_cset(rd, cond));
            }
+            &Inst::CSetm { rd, cond } => {
+                sink.put4(enc_csetm(rd, cond));
+            }
            &Inst::CCmpImm {
                size,
                rn,
@ -1109,6 +1146,11 @@ impl MachInstEmit for Inst {
                        inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
                        inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
                        inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
+                        inst_common::AtomicRmwOp::Nand
+                        | inst_common::AtomicRmwOp::Umin
+                        | inst_common::AtomicRmwOp::Umax
+                        | inst_common::AtomicRmwOp::Smin
+                        | inst_common::AtomicRmwOp::Smax => todo!("{:?}", op),
                        inst_common::AtomicRmwOp::Xchg => unreachable!(),
                    };
                    sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
@ -1132,7 +1174,18 @@ impl MachInstEmit for Inst {

                sink.put4(enc_dmb_ish()); // dmb ish
            }
-            &Inst::AtomicCAS { ty } => {
+            &Inst::AtomicCAS { rs, rt, rn, ty } => {
+                let size = match ty {
+                    I8 => 0b00,
+                    I16 => 0b01,
+                    I32 => 0b10,
+                    I64 => 0b11,
+                    _ => panic!("Unsupported type: {}", ty),
+                };
+
+                sink.put4(enc_cas(size, rs, rt, rn));
+            }
+            &Inst::AtomicCASLoop { ty } => {
                /* Emit this:
                     dmb         ish
                    again:
@ -1264,7 +1317,7 @@ impl MachInstEmit for Inst {
                sink.put4(enc_dmb_ish()); // dmb ish
            }
            &Inst::FpuMove64 { rd, rn } => {
-                sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
+                sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
            }
            &Inst::FpuMove128 { rd, rn } => {
                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
@ -1284,6 +1337,13 @@ impl MachInstEmit for Inst {
                        | machreg_to_vec(rd.to_reg()),
                );
            }
+            &Inst::FpuExtend { rd, rn, size } => {
+                sink.put4(enc_fpurr(
+                    0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
+                    rd,
+                    rn,
+                ));
+            }
            &Inst::FpuRR { fpu_op, rd, rn } => {
                let top22 = match fpu_op {
                    FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
@ -1428,12 +1488,18 @@ impl MachInstEmit for Inst {
                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
                        (0b0, 0b11000, enc_size | 0b10)
                    }
+                    VecMisc2::Cnt => {
+                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
+                        (0b0, 0b00101, enc_size)
+                    }
                };
                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
            }
            &Inst::VecLanes { op, rd, rn, size } => {
                let (q, size) = match size {
+                    VectorSize::Size8x8 => (0b0, 0b00),
                    VectorSize::Size8x16 => (0b1, 0b00),
+                    VectorSize::Size16x4 => (0b0, 0b01),
                    VectorSize::Size16x8 => (0b1, 0b01),
                    VectorSize::Size32x4 => (0b1, 0b10),
                    _ => unreachable!(),
@ -1718,6 +1784,17 @@ impl MachInstEmit for Inst {
                        | machreg_to_vec(rd.to_reg()),
                );
            }
+            &Inst::VecDupFPImm { rd, imm, size } => {
+                let imm = imm.enc_bits();
+                let op = match size.lane_size() {
+                    ScalarSize::Size32 => 0,
+                    ScalarSize::Size64 => 1,
+                    _ => unimplemented!(),
+                };
+                let q_op = op | ((size.is_128bits() as u32) << 1);
+
+                sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
+            }
            &Inst::VecDupImm {
                rd,
                imm,
@ -1985,73 +2062,47 @@ impl MachInstEmit for Inst {
            &Inst::Extend {
                rd,
                rn,
-                signed,
-                from_bits,
+                signed: false,
+                from_bits: 1,
                to_bits,
-            } if from_bits >= 8 => {
-                let top22 = match (signed, from_bits, to_bits) {
-                    (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
-                    (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
-                    (true, 8, 32) => 0b000_100110_0_000000_000111,  // SXTB (32)
-                    (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
-                    // The 64-bit unsigned variants are the same as the 32-bit ones,
-                    // because writes to Wn zero out the top 32 bits of Xn
-                    (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
-                    (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
-                    (true, 8, 64) => 0b100_100110_1_000000_000111,  // SXTB (64)
-                    (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
-                    // 32-to-64: the unsigned case is a 'mov' (special-cased below).
-                    (false, 32, 64) => 0,                           // MOV
-                    (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
-                    _ => panic!(
-                        "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
-                        signed, from_bits, to_bits
-                    ),
-                };
-                if top22 != 0 {
-                    sink.put4(enc_extend(top22, rd, rn));
-                } else {
-                    Inst::mov32(rd, rn).emit(sink, emit_info, state);
-                }
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && signed => {
-                assert!(to_bits <= 64);
-                // Reduce sign-extend-from-1-bit to:
-                // - and rd, rn, #1
-                // - sub rd, zr, rd
-
-                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
-                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
-                let sub_inst = Inst::AluRRR {
-                    alu_op: ALUOp::Sub64,
-                    rd,
-                    rn: zero_reg(),
-                    rm: rd.to_reg(),
-                };
-                sub_inst.emit(sink, emit_info, state);
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && !signed => {
+            } => {
                assert!(to_bits <= 64);
                // Reduce zero-extend-from-1-bit to:
                // - and rd, rn, #1
-
-                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
-                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+                // Note: This is special cased as UBFX may take more cycles
+                // than AND on smaller cores.
+                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
+                Inst::AluRRImmLogic {
+                    alu_op: ALUOp::And32,
+                    rd,
+                    rn,
+                    imml,
+                }
+                .emit(sink, emit_info, state);
            }
-            &Inst::Extend { .. } => {
-                panic!("Unsupported extend variant");
+            &Inst::Extend {
+                rd,
+                rn,
+                signed: false,
+                from_bits: 32,
+                to_bits: 64,
+            } => {
+                let mov = Inst::Mov32 { rd, rm: rn };
+                mov.emit(sink, emit_info, state);
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } => {
+                let (opc, size) = if signed {
+                    (0b00, OperandSize::from_bits(to_bits))
+                } else {
+                    (0b10, OperandSize::Size32)
+                };
+                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
            }
            &Inst::Jump { ref dest } => {
                let off = sink.cur_offset();
@ -2293,7 +2344,7 @@ impl MachInstEmit for Inst {
                    add.emit(sink, emit_info, state);
                } else if offset == 0 {
                    if reg != rd.to_reg() {
-                        let mov = Inst::mov(rd, reg);
+                        let mov = Inst::Mov64 { rd, rm: reg };

                        mov.emit(sink, emit_info, state);
                    }
@ -2345,6 +2396,13 @@ impl MachInstEmit for Inst {
                    sink.bind_label(jump_around_label);
                }
            }
+            &Inst::ValueLabelMarker { .. } => {
+                // Nothing; this is only used to compute debug info.
+            }
+
+            &Inst::Unwind { ref inst } => {
+                sink.add_unwind(inst.clone());
+            }
        }

        let end_off = sink.cur_offset();
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@ -1846,6 +1846,22 @@ fn test_aarch64_binemit() {
        "EFB79F9A",
        "cset x15, ge",
    ));
+    insns.push((
+        Inst::CSetm {
+            rd: writable_xreg(0),
+            cond: Cond::Eq,
+        },
+        "E0139FDA",
+        "csetm x0, eq",
+    ));
+    insns.push((
+        Inst::CSetm {
+            rd: writable_xreg(16),
+            cond: Cond::Vs,
+        },
+        "F0739FDA",
+        "csetm x16, vs",
+    ));
    insns.push((
        Inst::CCmpImm {
            size: OperandSize::Size64,
@ -2056,6 +2072,24 @@ fn test_aarch64_binemit() {
        "5205084E",
        "dup v18.2d, v10.d[0]",
    ));
+    insns.push((
+        Inst::VecDupFPImm {
+            rd: writable_vreg(31),
+            imm: ASIMDFPModImm::maybe_from_u64(1_f32.to_bits() as u64, ScalarSize::Size32).unwrap(),
+            size: VectorSize::Size32x2,
+        },
+        "1FF6030F",
+        "fmov v31.2s, #1",
+    ));
+    insns.push((
+        Inst::VecDupFPImm {
+            rd: writable_vreg(0),
+            imm: ASIMDFPModImm::maybe_from_u64(2_f64.to_bits(), ScalarSize::Size64).unwrap(),
+            size: VectorSize::Size64x2,
+        },
+        "00F4006F",
+        "fmov v0.2d, #2",
+    ));
    insns.push((
        Inst::VecDupImm {
            rd: writable_vreg(31),
@ -2066,16 +2100,96 @@ fn test_aarch64_binemit() {
        "FFE7074F",
        "movi v31.16b, #255",
    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(30),
+            imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size16).unwrap(),
+            invert: false,
+            size: VectorSize::Size16x8,
+        },
+        "1E84004F",
+        "movi v30.8h, #0",
+    ));
    insns.push((
        Inst::VecDupImm {
            rd: writable_vreg(0),
-            imm: ASIMDMovModImm::zero(),
+            imm: ASIMDMovModImm::zero(ScalarSize::Size16),
            invert: true,
            size: VectorSize::Size16x4,
        },
        "0084002F",
        "mvni v0.4h, #0",
    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(0),
+            imm: ASIMDMovModImm::maybe_from_u64(256, ScalarSize::Size16).unwrap(),
+            invert: false,
+            size: VectorSize::Size16x8,
+        },
+        "20A4004F",
+        "movi v0.8h, #1, LSL #8",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(8),
+            imm: ASIMDMovModImm::maybe_from_u64(2228223, ScalarSize::Size32).unwrap(),
+            invert: false,
+            size: VectorSize::Size32x4,
+        },
+        "28D4014F",
+        "movi v8.4s, #33, MSL #16",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(16),
+            imm: ASIMDMovModImm::maybe_from_u64(35071, ScalarSize::Size32).unwrap(),
+            invert: true,
+            size: VectorSize::Size32x2,
+        },
+        "10C5042F",
+        "mvni v16.2s, #136, MSL #8",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(1),
+            imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size32).unwrap(),
+            invert: false,
+            size: VectorSize::Size32x2,
+        },
+        "0104000F",
+        "movi v1.2s, #0",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(24),
+            imm: ASIMDMovModImm::maybe_from_u64(1107296256, ScalarSize::Size32).unwrap(),
+            invert: false,
+            size: VectorSize::Size32x4,
+        },
+        "5864024F",
+        "movi v24.4s, #66, LSL #24",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(8),
+            imm: ASIMDMovModImm::zero(ScalarSize::Size64),
+            invert: false,
+            size: VectorSize::Size64x2,
+        },
+        "08E4006F",
+        "movi v8.2d, #0",
+    ));
+    insns.push((
+        Inst::VecDupImm {
+            rd: writable_vreg(7),
+            imm: ASIMDMovModImm::maybe_from_u64(18374687574904995840, ScalarSize::Size64).unwrap(),
+            invert: false,
+            size: VectorSize::Size64x2,
+        },
+        "87E6046F",
+        "movi v7.2d, #18374687574904995840",
+    ));
    insns.push((
        Inst::VecExtend {
            t: VecExtendOp::Sxtl8,
@ -3678,6 +3792,28 @@ fn test_aarch64_binemit() {
        "frintp v12.2d, v17.2d",
    ));

+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Cnt,
+            rd: writable_vreg(23),
+            rn: vreg(5),
+            size: VectorSize::Size8x8,
+        },
+        "B758200E",
+        "cnt v23.8b, v5.8b",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(0),
+            rn: vreg(31),
+            size: VectorSize::Size8x8,
+        },
+        "E0AB312E",
+        "uminv b0, v31.8b",
+    ));
+
    insns.push((
        Inst::VecLanes {
            op: VecLanesOp::Uminv,
@ -3722,6 +3858,17 @@ fn test_aarch64_binemit() {
        "addv b2, v29.16b",
    ));

+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Addv,
+            rd: writable_vreg(15),
+            rn: vreg(7),
+            size: VectorSize::Size16x4,
+        },
+        "EFB8710E",
+        "addv h15, v7.4h",
+    ));
+
    insns.push((
        Inst::VecLanes {
            op: VecLanesOp::Addv,
@ -3952,6 +4099,50 @@ fn test_aarch64_binemit() {
        "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
    ));

+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(3),
+            rn: xreg(5),
+            signed: false,
+            from_bits: 1,
+            to_bits: 32,
+        },
+        "A3000012",
+        "and w3, w5, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(3),
+            rn: xreg(5),
+            signed: false,
+            from_bits: 1,
+            to_bits: 64,
+        },
+        "A3000012",
+        "and w3, w5, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(10),
+            rn: xreg(21),
+            signed: true,
+            from_bits: 1,
+            to_bits: 32,
+        },
+        "AA020013",
+        "sbfx w10, w21, #0, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 1,
+            to_bits: 64,
+        },
+        "41004093",
+        "sbfx x1, x2, #0, #1",
+    ));
    insns.push((
        Inst::Extend {
            rd: writable_xreg(1),
@ -4005,7 +4196,7 @@ fn test_aarch64_binemit() {
            to_bits: 64,
        },
        "411C0053",
-        "uxtb x1, w2",
+        "uxtb w1, w2",
    ));
    insns.push((
        Inst::Extend {
@ -4027,7 +4218,7 @@ fn test_aarch64_binemit() {
            to_bits: 64,
        },
        "413C0053",
-        "uxth x1, w2",
+        "uxth w1, w2",
    ));
    insns.push((
        Inst::Extend {
@ -4281,8 +4472,8 @@ fn test_aarch64_binemit() {
            rd: writable_vreg(8),
            rn: vreg(4),
        },
-        "881CA40E",
-        "mov v8.8b, v4.8b",
+        "8840601E",
+        "fmov d8, d4",
    ));

    insns.push((
@ -4316,6 +4507,16 @@ fn test_aarch64_binemit() {
        "mov d23, v11.d[0]",
    ));

+    insns.push((
+        Inst::FpuExtend {
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            size: ScalarSize::Size32,
+        },
+        "1F40201E",
+        "fmov s31, s0",
+    ));
+
    insns.push((
        Inst::FpuRR {
            fpu_op: FPUOp1::Abs32,
@ -5034,9 +5235,48 @@ fn test_aarch64_binemit() {
        "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
        "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
    ));
-
    insns.push((
        Inst::AtomicCAS {
+            rs: writable_xreg(28),
+            rt: xreg(20),
+            rn: xreg(10),
+            ty: I8,
+        },
+        "54FDFC08",
+        "casalb w28, w20, [x10]",
+    ));
+    insns.push((
+        Inst::AtomicCAS {
+            rs: writable_xreg(2),
+            rt: xreg(19),
+            rn: xreg(23),
+            ty: I16,
+        },
+        "F3FEE248",
+        "casalh w2, w19, [x23]",
+    ));
+    insns.push((
+        Inst::AtomicCAS {
+            rs: writable_xreg(0),
+            rt: zero_reg(),
+            rn: stack_reg(),
+            ty: I32,
+        },
+        "FFFFE088",
+        "casal w0, wzr, [sp]",
+    ));
+    insns.push((
+        Inst::AtomicCAS {
+            rs: writable_xreg(7),
+            rt: xreg(15),
+            rn: xreg(27),
+            ty: I64,
+        },
+        "6FFFE7C8",
+        "casal x7, x15, [x27]",
+    ));
+    insns.push((
+        Inst::AtomicCASLoop {
            ty: I8,
        },
        "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
@ -5044,7 +5284,7 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::AtomicCAS {
+        Inst::AtomicCASLoop {
            ty: I64,
        },
        "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@ -668,39 +668,208 @@ impl MoveWideConst {
 }

 /// Advanced SIMD modified immediate as used by MOVI/MVNI.
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub struct ASIMDMovModImm {
    imm: u8,
    shift: u8,
+    is_64bit: bool,
    shift_ones: bool,
 }

 impl ASIMDMovModImm {
+    /// Construct an ASIMDMovModImm from an arbitrary 64-bit constant, if possible.
+    /// Note that the bits in `value` outside of the range specified by `size` are
+    /// ignored; for example, in the case of `ScalarSize::Size8` all bits above the
+    /// lowest 8 are ignored.
    pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
        match size {
            ScalarSize::Size8 => Some(ASIMDMovModImm {
                imm: value as u8,
                shift: 0,
+                is_64bit: false,
                shift_ones: false,
            }),
+            ScalarSize::Size16 => {
+                let value = value as u16;
+
+                if value >> 8 == 0 {
+                    Some(ASIMDMovModImm {
+                        imm: value as u8,
+                        shift: 0,
+                        is_64bit: false,
+                        shift_ones: false,
+                    })
+                } else if value as u8 == 0 {
+                    Some(ASIMDMovModImm {
+                        imm: (value >> 8) as u8,
+                        shift: 8,
+                        is_64bit: false,
+                        shift_ones: false,
+                    })
+                } else {
+                    None
+                }
+            }
+            ScalarSize::Size32 => {
+                let value = value as u32;
+
+                // Value is of the form 0x00MMFFFF.
+                if value & 0xFF00FFFF == 0x0000FFFF {
+                    let imm = (value >> 16) as u8;
+
+                    Some(ASIMDMovModImm {
+                        imm,
+                        shift: 16,
+                        is_64bit: false,
+                        shift_ones: true,
+                    })
+                // Value is of the form 0x0000MMFF.
+                } else if value & 0xFFFF00FF == 0x000000FF {
+                    let imm = (value >> 8) as u8;
+
+                    Some(ASIMDMovModImm {
+                        imm,
+                        shift: 8,
+                        is_64bit: false,
+                        shift_ones: true,
+                    })
+                } else {
+                    // Of the 4 bytes, at most one is non-zero.
+                    for shift in (0..32).step_by(8) {
+                        if value & (0xFF << shift) == value {
+                            return Some(ASIMDMovModImm {
+                                imm: (value >> shift) as u8,
+                                shift,
+                                is_64bit: false,
+                                shift_ones: false,
+                            });
+                        }
+                    }
+
+                    None
+                }
+            }
+            ScalarSize::Size64 => {
+                let mut imm = 0u8;
+
+                // Check if all bytes are either 0 or 0xFF.
+                for i in 0..8 {
+                    let b = (value >> (i * 8)) as u8;
+
+                    if b == 0 || b == 0xFF {
+                        imm |= (b & 1) << i;
+                    } else {
+                        return None;
+                    }
+                }
+
+                Some(ASIMDMovModImm {
+                    imm,
+                    shift: 0,
+                    is_64bit: true,
+                    shift_ones: false,
+                })
+            }
            _ => None,
        }
    }

    /// Create a zero immediate of this format.
-    pub fn zero() -> Self {
+    pub fn zero(size: ScalarSize) -> Self {
        ASIMDMovModImm {
            imm: 0,
            shift: 0,
+            is_64bit: size == ScalarSize::Size64,
            shift_ones: false,
        }
    }

+    /// Returns the value that this immediate represents.
    pub fn value(&self) -> (u8, u32, bool) {
        (self.imm, self.shift as u32, self.shift_ones)
    }
 }

+/// Advanced SIMD modified immediate as used by the vector variant of FMOV.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct ASIMDFPModImm {
+    imm: u8,
+    is_64bit: bool,
+}
+
+impl ASIMDFPModImm {
+    /// Construct an ASIMDFPModImm from an arbitrary 64-bit constant, if possible.
+    pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDFPModImm> {
+        // In all cases immediates are encoded as an 8-bit number 0b_abcdefgh;
+        // let `D` be the inverse of the digit `d`.
+        match size {
+            ScalarSize::Size32 => {
+                // In this case the representable immediates are 32-bit numbers of the form
+                // 0b_aBbb_bbbc_defg_h000 shifted to the left by 16.
+                let value = value as u32;
+                let b0_5 = (value >> 19) & 0b111111;
+                let b6 = (value >> 19) & (1 << 6);
+                let b7 = (value >> 24) & (1 << 7);
+                let imm = (b0_5 | b6 | b7) as u8;
+
+                if value == Self::value32(imm) {
+                    Some(ASIMDFPModImm {
+                        imm,
+                        is_64bit: false,
+                    })
+                } else {
+                    None
+                }
+            }
+            ScalarSize::Size64 => {
+                // In this case the representable immediates are 64-bit numbers of the form
+                // 0b_aBbb_bbbb_bbcd_efgh shifted to the left by 48.
+                let b0_5 = (value >> 48) & 0b111111;
+                let b6 = (value >> 48) & (1 << 6);
+                let b7 = (value >> 56) & (1 << 7);
+                let imm = (b0_5 | b6 | b7) as u8;
+
+                if value == Self::value64(imm) {
+                    Some(ASIMDFPModImm {
+                        imm,
+                        is_64bit: true,
+                    })
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        }
+    }
+
+    /// Returns bits ready for encoding.
+    pub fn enc_bits(&self) -> u8 {
+        self.imm
+    }
+
+    /// Returns the 32-bit value that corresponds to an 8-bit encoding.
+    fn value32(imm: u8) -> u32 {
+        let imm = imm as u32;
+        let b0_5 = imm & 0b111111;
+        let b6 = (imm >> 6) & 1;
+        let b6_inv = b6 ^ 1;
+        let b7 = (imm >> 7) & 1;
+
+        b0_5 << 19 | (b6 * 0b11111) << 25 | b6_inv << 30 | b7 << 31
+    }
+
+    /// Returns the 64-bit value that corresponds to an 8-bit encoding.
+    fn value64(imm: u8) -> u64 {
+        let imm = imm as u64;
+        let b0_5 = imm & 0b111111;
+        let b6 = (imm >> 6) & 1;
+        let b6_inv = b6 ^ 1;
+        let b7 = (imm >> 7) & 1;
+
+        b0_5 << 48 | (b6 * 0b11111111) << 54 | b6_inv << 62 | b7 << 63
+    }
+}
+
 impl PrettyPrint for NZCV {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
        let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
@ -782,7 +951,20 @@ impl PrettyPrint for MoveWideConst {

 impl PrettyPrint for ASIMDMovModImm {
    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
-        if self.shift == 0 {
+        if self.is_64bit {
+            debug_assert_eq!(self.shift, 0);
+
+            let enc_imm = self.imm as i8;
+            let mut imm = 0u64;
+
+            for i in 0..8 {
+                let b = (enc_imm >> i) & 1;
+
+                imm |= (-b as u8 as u64) << (i * 8);
+            }
+
+            format!("#{}", imm)
+        } else if self.shift == 0 {
            format!("#{}", self.imm)
        } else {
            let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
@ -791,6 +973,16 @@ impl PrettyPrint for ASIMDMovModImm {
    }
 }

+impl PrettyPrint for ASIMDFPModImm {
+    fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+        if self.is_64bit {
+            format!("#{}", f64::from_bits(Self::value64(self.imm)))
+        } else {
+            format!("#{}", f32::from_bits(Self::value32(self.imm)))
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
@ -1022,4 +1214,44 @@ mod test {
            unreachable!();
        }
    }
+
+    #[test]
+    fn asimd_fp_mod_imm_test() {
+        assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size32));
+        assert_eq!(
+            None,
+            ASIMDFPModImm::maybe_from_u64(0.013671875_f32.to_bits() as u64, ScalarSize::Size32)
+        );
+        assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size64));
+        assert_eq!(
+            None,
+            ASIMDFPModImm::maybe_from_u64(10000_f64.to_bits(), ScalarSize::Size64)
+        );
+    }
+
+    #[test]
+    fn asimd_mov_mod_imm_test() {
+        assert_eq!(
+            None,
+            ASIMDMovModImm::maybe_from_u64(513, ScalarSize::Size16)
+        );
+        assert_eq!(
+            None,
+            ASIMDMovModImm::maybe_from_u64(4278190335, ScalarSize::Size32)
+        );
+        assert_eq!(
+            None,
+            ASIMDMovModImm::maybe_from_u64(8388608, ScalarSize::Size64)
+        );
+
+        assert_eq!(
+            Some(ASIMDMovModImm {
+                imm: 66,
+                shift: 16,
+                is_64bit: false,
+                shift_ones: true,
+            }),
+            ASIMDMovModImm::maybe_from_u64(4390911, ScalarSize::Size32)
+        );
+    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@ -5,10 +5,10 @@

 use crate::binemit::CodeOffset;
 use crate::ir::types::{
-    B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
-    I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
+    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
 };
-use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
+use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type, ValueLabel};
+use crate::isa::unwind::UnwindInst;
 use crate::isa::CallConv;
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};
@ -332,6 +332,8 @@ pub enum VecMisc2 {
    Frintm,
    /// Floating point round to integral, rounding towards plus infinity
    Frintp,
+    /// Population count per byte
+    Cnt,
 }

 /// A Vector narrowing operation with two registers.
@ -660,6 +662,12 @@ pub enum Inst {
        cond: Cond,
    },

+    /// A conditional-set-mask operation.
+    CSetm {
+        rd: Writable<Reg>,
+        cond: Cond,
+    },
+
    /// A conditional comparison with an immediate.
    CCmpImm {
        size: OperandSize,
@ -688,19 +696,26 @@ pub enum Inst {
        op: inst_common::AtomicRmwOp,
    },

+    /// An atomic compare-and-swap operation. This instruction is sequentially consistent.
+    AtomicCAS {
+        rs: Writable<Reg>,
+        rt: Reg,
+        rn: Reg,
+        ty: Type,
+    },
+
    /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
-    /// store-conditional loop.  (Although we could possibly implement it more directly using
-    /// CAS insns that are available in some revisions of AArch64 above 8.0).  The sequence is
-    /// both preceded and followed by a fence which is at least as comprehensive as that of the
-    /// `Fence` instruction below.  This instruction is sequentially consistent.  Note that the
-    /// operand conventions, although very similar to AtomicRMW, are different:
+    /// store-conditional loop. The sequence is both preceded and followed by a fence which is
+    /// at least as comprehensive as that of the `Fence` instruction below.  This instruction
+    /// is sequentially consistent.  Note that the operand conventions, although very similar
+    /// to AtomicRMW, are different:
    ///
    /// x25   (rd) address
    /// x26   (rd) expected value
    /// x28   (rd) replacement value
    /// x27   (wr) old value
    /// x24   (wr) scratch reg; value afterwards has no meaning
-    AtomicCAS {
+    AtomicCASLoop {
        ty: Type, // I8, I16, I32 or I64
    },

@ -748,6 +763,13 @@ pub enum Inst {
        size: VectorSize,
    },

+    /// Zero-extend a SIMD & FP scalar to the full width of a vector register.
+    FpuExtend {
+        rd: Writable<Reg>,
+        rn: Reg,
+        size: ScalarSize,
+    },
+
    /// 1-op FPU instruction.
    FpuRR {
        fpu_op: FPUOp1,
@ -921,6 +943,13 @@ pub enum Inst {
        size: VectorSize,
    },

+    /// Duplicate FP immediate to vector.
+    VecDupFPImm {
+        rd: Writable<Reg>,
+        imm: ASIMDFPModImm,
+        size: VectorSize,
+    },
+
    /// Duplicate immediate to vector.
    VecDupImm {
        rd: Writable<Reg>,
@ -1189,6 +1218,17 @@ pub enum Inst {
        /// The needed space before the next deadline.
        needed_space: CodeOffset,
    },
+
+    /// A definition of a value label.
+    ValueLabelMarker {
+        reg: Reg,
+        label: ValueLabel,
+    },
+
+    /// An unwind pseudo-instruction.
+    Unwind {
+        inst: UnwindInst,
+    },
 }

 fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
@ -1211,35 +1251,6 @@ fn inst_size_test() {
 }

 impl Inst {
-    /// Create a move instruction.
-    pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
-        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
-        if from_reg.get_class() == RegClass::I64 {
-            Inst::Mov64 {
-                rd: to_reg,
-                rm: from_reg,
-            }
-        } else if from_reg.get_class() == RegClass::V128 {
-            Inst::FpuMove128 {
-                rd: to_reg,
-                rn: from_reg,
-            }
-        } else {
-            Inst::FpuMove64 {
-                rd: to_reg,
-                rn: from_reg,
-            }
-        }
-    }
-
-    /// Create a 32-bit move instruction.
-    pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
-        Inst::Mov32 {
-            rd: to_reg,
-            rm: from_reg,
-        }
-    }
-
    /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
    /// logical immediate, or constant pool).
    pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
@ -1312,22 +1323,25 @@ impl Inst {
    }

    /// Create instructions that load a 32-bit floating-point constant.
-    pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        value: u32,
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
+        // Note that we must make sure that all bits outside the lowest 32 are set to 0
+        // because this function is also used to load wider constants (that have zeros
+        // in their most significant bits).
        if value == 0 {
            smallvec![Inst::VecDupImm {
                rd,
-                imm: ASIMDMovModImm::zero(),
+                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
                invert: false,
-                size: VectorSize::Size8x8
+                size: VectorSize::Size32x2
            }]
        } else {
            // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
            // bits.
-            let tmp = alloc_tmp(RegClass::I64, I32);
+            let tmp = alloc_tmp(I32);
            let mut insts = Inst::load_constant(tmp, value as u64);

            insts.push(Inst::MovToFpu {
@ -1341,18 +1355,21 @@ impl Inst {
    }

    /// Create instructions that load a 64-bit floating-point constant.
-    pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+    pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        const_data: u64,
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
+        // Note that we must make sure that all bits outside the lowest 64 are set to 0
+        // because this function is also used to load wider constants (that have zeros
+        // in their most significant bits).
        if let Ok(const_data) = u32::try_from(const_data) {
            Inst::load_fp_constant32(rd, const_data, alloc_tmp)
        // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
        // bits.  Also, treat it as half of a 128-bit vector and consider replicated
        // patterns. Scalar MOVI might also be an option.
        } else if const_data & (u32::MAX as u64) == 0 {
-            let tmp = alloc_tmp(RegClass::I64, I64);
+            let tmp = alloc_tmp(I64);
            let mut insts = Inst::load_constant(tmp, const_data);

            insts.push(Inst::MovToFpu {
@ -1368,7 +1385,7 @@ impl Inst {
    }

    /// Create instructions that load a 128-bit vector constant.
-    pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+    pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        const_data: u128,
        alloc_tmp: F,
@ -1416,15 +1433,24 @@ impl Inst {
        r
    }

-    /// Create instructions that load a 128-bit vector constant consisting of elements with
+    /// Create instructions that load a vector constant consisting of elements with
    /// the same value.
-    pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+    pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        pattern: u64,
        size: VectorSize,
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 5]> {
        let lane_size = size.lane_size();
+        let widen_32_bit_pattern = |pattern, lane_size| {
+            if lane_size == ScalarSize::Size32 {
+                let pattern = pattern as u32 as u64;
+
+                ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
+            } else {
+                None
+            }
+        };

        if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
            smallvec![Inst::VecDupImm {
@ -1443,8 +1469,29 @@ impl Inst {
                invert: true,
                size
            }]
+        } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
+            let mut insts = smallvec![Inst::VecDupImm {
+                rd,
+                imm,
+                invert: false,
+                size: VectorSize::Size64x2,
+            }];
+
+            // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
+            // lower 64 bits instead.
+            if !size.is_128bits() {
+                insts.push(Inst::FpuExtend {
+                    rd,
+                    rn: rd.to_reg(),
+                    size: ScalarSize::Size64,
+                });
+            }
+
+            insts
+        } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
+            smallvec![Inst::VecDupFPImm { rd, imm, size }]
        } else {
-            let tmp = alloc_tmp(RegClass::I64, I64);
+            let tmp = alloc_tmp(I64);
            let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);

            insts.push(Inst::VecDup {
@ -1558,6 +1605,17 @@ impl Inst {
            }
        }
    }
+
+    /// Generate a LoadAddr instruction (load address of an amode into
+    /// register). Elides when possible (when amode is just a register). Returns
+    /// destination register: either `rd` or a register directly from the amode.
+    pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
+        if let Some(r) = mem.is_reg() {
+            (r, None)
+        } else {
+            (rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
+        }
+    }
 }

 //=============================================================================
@ -1691,7 +1749,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_use(rn);
            collector.add_use(rm);
        }
-        &Inst::CSet { rd, .. } => {
+        &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
            collector.add_def(rd);
        }
        &Inst::CCmpImm { rn, .. } => {
@ -1704,7 +1762,12 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(writable_xreg(27));
            collector.add_def(writable_xreg(28));
        }
-        &Inst::AtomicCAS { .. } => {
+        &Inst::AtomicCAS { rs, rt, rn, .. } => {
+            collector.add_mod(rs);
+            collector.add_use(rt);
+            collector.add_use(rn);
+        }
+        &Inst::AtomicCASLoop { .. } => {
            collector.add_use(xreg(25));
            collector.add_use(xreg(26));
            collector.add_use(xreg(28));
@ -1732,6 +1795,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+        &Inst::FpuExtend { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
        &Inst::FpuRR { rd, rn, .. } => {
            collector.add_def(rd);
            collector.add_use(rn);
@ -1881,6 +1948,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+        &Inst::VecDupFPImm { rd, .. } => {
+            collector.add_def(rd);
+        }
        &Inst::VecDupImm { rd, .. } => {
            collector.add_def(rd);
        }
@ -1971,6 +2041,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            memarg_regs(mem, collector);
        }
        &Inst::VirtualSPOffsetAdj { .. } => {}
+        &Inst::ValueLabelMarker { reg, .. } => {
+            collector.add_use(reg);
+        }
+        &Inst::Unwind { .. } => {}
        &Inst::EmitIsland { .. } => {}
    }
 }
@ -2259,7 +2333,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_use(mapper, rn);
            map_use(mapper, rm);
        }
-        &mut Inst::CSet { ref mut rd, .. } => {
+        &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
            map_def(mapper, rd);
        }
        &mut Inst::CCmpImm { ref mut rn, .. } => {
@ -2268,7 +2342,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
        &mut Inst::AtomicRMW { .. } => {
            // There are no vregs to map in this insn.
        }
-        &mut Inst::AtomicCAS { .. } => {
+        &mut Inst::AtomicCAS {
+            ref mut rs,
+            ref mut rt,
+            ref mut rn,
+            ..
+        } => {
+            map_mod(mapper, rs);
+            map_use(mapper, rt);
+            map_use(mapper, rn);
+        }
+        &mut Inst::AtomicCASLoop { .. } => {
            // There are no vregs to map in this insn.
        }
        &mut Inst::AtomicLoad {
@ -2310,6 +2394,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rd);
            map_use(mapper, rn);
        }
+        &mut Inst::FpuExtend {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
        &mut Inst::FpuRR {
            ref mut rd,
            ref mut rn,
@ -2593,6 +2685,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rd);
            map_use(mapper, rn);
        }
+        &mut Inst::VecDupFPImm { ref mut rd, .. } => {
+            map_def(mapper, rd);
+        }
        &mut Inst::VecDupImm { ref mut rd, .. } => {
            map_def(mapper, rd);
        }
@ -2710,6 +2805,10 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
        }
        &mut Inst::VirtualSPOffsetAdj { .. } => {}
        &mut Inst::EmitIsland { .. } => {}
+        &mut Inst::ValueLabelMarker { ref mut reg, .. } => {
+            map_use(mapper, reg);
+        }
+        &mut Inst::Unwind { .. } => {}
    }
 }

@ -2778,16 +2877,43 @@ impl MachInst for Inst {
    }

    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
-        assert!(ty.bits() <= 128);
-        Inst::mov(to_reg, from_reg)
+        let bits = ty.bits();
+
+        assert!(bits <= 128);
+        assert!(to_reg.to_reg().get_class() == from_reg.get_class());
+
+        if from_reg.get_class() == RegClass::I64 {
+            Inst::Mov64 {
+                rd: to_reg,
+                rm: from_reg,
+            }
+        } else if from_reg.get_class() == RegClass::V128 {
+            if bits > 64 {
+                Inst::FpuMove128 {
+                    rd: to_reg,
+                    rn: from_reg,
+                }
+            } else {
+                Inst::FpuMove64 {
+                    rd: to_reg,
+                    rn: from_reg,
+                }
+            }
+        } else {
+            panic!("Unexpected register class: {:?}", from_reg.get_class());
+        }
    }

-    fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
-        to_reg: Writable<Reg>,
-        value: u64,
+    fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
+        to_regs: ValueRegs<Writable<Reg>>,
+        value: u128,
        ty: Type,
        alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
+        let to_reg = to_regs
+            .only_reg()
+            .expect("multi-reg values not supported yet");
+        let value = value as u64;
        if ty == F64 {
            Inst::load_fp_constant64(to_reg, value, alloc_tmp)
        } else if ty == F32 {
@ -2811,11 +2937,10 @@ impl MachInst for Inst {
        }
    }

-    fn gen_zero_len_nop() -> Inst {
-        Inst::Nop0
-    }
-
    fn gen_nop(preferred_size: usize) -> Inst {
+        if preferred_size == 0 {
+            return Inst::Nop0;
+        }
        // We can't give a NOP (or any insn) < 4 bytes.
        assert!(preferred_size >= 4);
        Inst::Nop4
@ -2825,14 +2950,28 @@ impl MachInst for Inst {
        None
    }

-    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
        match ty {
-            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
-            F32 | F64 => Ok(RegClass::V128),
-            IFLAGS | FFLAGS => Ok(RegClass::I64),
-            B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
-                Ok(RegClass::V128)
+            I8 => Ok((&[RegClass::I64], &[I8])),
+            I16 => Ok((&[RegClass::I64], &[I16])),
+            I32 => Ok((&[RegClass::I64], &[I32])),
+            I64 => Ok((&[RegClass::I64], &[I64])),
+            B1 => Ok((&[RegClass::I64], &[B1])),
+            B8 => Ok((&[RegClass::I64], &[B8])),
+            B16 => Ok((&[RegClass::I64], &[B16])),
+            B32 => Ok((&[RegClass::I64], &[B32])),
+            B64 => Ok((&[RegClass::I64], &[B64])),
+            R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
+            R64 => Ok((&[RegClass::I64], &[R64])),
+            F32 => Ok((&[RegClass::V128], &[F32])),
+            F64 => Ok((&[RegClass::V128], &[F64])),
+            I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
+            B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
+            _ if ty.is_vector() => {
+                assert!(ty.bits() <= 128);
+                Ok((&[RegClass::V128], &[I8X16]))
            }
+            IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
            _ => Err(CodegenError::Unsupported(format!(
                "Unexpected SSA-value type: {}",
                ty
@ -2864,6 +3003,17 @@ impl MachInst for Inst {
    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
        RegClass::I64
    }
+
+    fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
+        Inst::ValueLabelMarker { label, reg }
+    }
+
+    fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
+        match self {
+            Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
+            _ => None,
+        }
+    }
 }

 //=============================================================================
@ -3039,41 +3189,13 @@ impl Inst {
                let rn = show_ireg_sized(rn, mb_rru, size);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::ULoad8 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::SLoad8 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::ULoad16 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::SLoad16 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::ULoad32 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::SLoad32 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::ULoad64 {
-                rd,
-                ref mem,
-                ..
-            } => {
+            &Inst::ULoad8 { rd, ref mem, .. }
+            | &Inst::SLoad8 { rd, ref mem, .. }
+            | &Inst::ULoad16 { rd, ref mem, .. }
+            | &Inst::SLoad16 { rd, ref mem, .. }
+            | &Inst::ULoad32 { rd, ref mem, .. }
+            | &Inst::SLoad32 { rd, ref mem, .. }
+            | &Inst::ULoad64 { rd, ref mem, .. } => {
                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);

                let is_unscaled = match &mem {
@ -3101,26 +3223,10 @@ impl Inst {
                let mem = mem.show_rru(mb_rru);
                format!("{}{} {}, {}", mem_str, op, rd, mem)
            }
-            &Inst::Store8 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::Store16 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::Store32 {
-                rd,
-                ref mem,
-                ..
-            }
-            | &Inst::Store64 {
-                rd,
-                ref mem,
-                ..
-            } => {
+            &Inst::Store8 { rd, ref mem, .. }
+            | &Inst::Store16 { rd, ref mem, .. }
+            | &Inst::Store32 { rd, ref mem, .. }
+            | &Inst::Store64 { rd, ref mem, .. } => {
                let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);

                let is_unscaled = match &mem {
@ -3142,13 +3248,17 @@ impl Inst {
                let mem = mem.show_rru(mb_rru);
                format!("{}{} {}, {}", mem_str, op, rd, mem)
            }
-            &Inst::StoreP64 { rt, rt2, ref mem, .. } => {
+            &Inst::StoreP64 {
+                rt, rt2, ref mem, ..
+            } => {
                let rt = rt.show_rru(mb_rru);
                let rt2 = rt2.show_rru(mb_rru);
                let mem = mem.show_rru(mb_rru);
                format!("stp {}, {}, {}", rt, rt2, mem)
            }
-            &Inst::LoadP64 { rt, rt2, ref mem, .. } => {
+            &Inst::LoadP64 {
+                rt, rt2, ref mem, ..
+            } => {
                let rt = rt.to_reg().show_rru(mb_rru);
                let rt2 = rt2.to_reg().show_rru(mb_rru);
                let mem = mem.show_rru(mb_rru);
@ -3191,6 +3301,11 @@ impl Inst {
                let cond = cond.show_rru(mb_rru);
                format!("cset {}, {}", rd, cond)
            }
+            &Inst::CSetm { rd, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("csetm {}, {}", rd, cond)
+            }
            &Inst::CCmpImm {
                size,
                rn,
@ -3209,27 +3324,52 @@ impl Inst {
                    "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
                    ty.bits(), op)
            }
-            &Inst::AtomicCAS { ty, .. } => {
+            &Inst::AtomicCAS { rs, rt, rn, ty } => {
+                let op = match ty {
+                    I8 => "casalb",
+                    I16 => "casalh",
+                    I32 | I64 => "casal",
+                    _ => panic!("Unsupported type: {}", ty),
+                };
+                let size = OperandSize::from_ty(ty);
+                let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
+                let rt = show_ireg_sized(rt, mb_rru, size);
+                let rn = rn.show_rru(mb_rru);
+
+                format!("{} {}, {}, [{}]", op, rs, rt, rn)
+            }
+            &Inst::AtomicCASLoop { ty } => {
                format!(
                    "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
                    ty.bits())
            }
-            &Inst::AtomicLoad { ty, r_data, r_addr, .. } => {
+            &Inst::AtomicLoad {
+                ty, r_data, r_addr, ..
+            } => {
                format!(
                    "atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
-                    r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru))
+                    r_data.show_rru(mb_rru),
+                    ty.bits(),
+                    r_addr.show_rru(mb_rru)
+                )
            }
-            &Inst::AtomicStore { ty, r_data, r_addr, .. } => {
+            &Inst::AtomicStore {
+                ty, r_data, r_addr, ..
+            } => {
                format!(
-                    "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru))
+                    "atomically {{ {}_bits_at[{}] = {} }}",
+                    ty.bits(),
+                    r_addr.show_rru(mb_rru),
+                    r_data.show_rru(mb_rru)
+                )
            }
            &Inst::Fence {} => {
                format!("dmb ish")
            }
            &Inst::FpuMove64 { rd, rn } => {
-                let rd = rd.to_reg().show_rru(mb_rru);
-                let rn = rn.show_rru(mb_rru);
-                format!("mov {}.8b, {}.8b", rd, rn)
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+                let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+                format!("fmov {}, {}", rd, rn)
            }
            &Inst::FpuMove128 { rd, rn } => {
                let rd = rd.to_reg().show_rru(mb_rru);
@ -3241,6 +3381,12 @@ impl Inst {
                let rn = show_vreg_element(rn, mb_rru, idx, size);
                format!("mov {}, {}", rd, rn)
            }
+            &Inst::FpuExtend { rd, rn, size } => {
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+                let rn = show_vreg_scalar(rn, mb_rru, size);
+
+                format!("fmov {}, {}", rd, rn)
+            }
            &Inst::FpuRR { fpu_op, rd, rn } => {
                let (op, sizesrc, sizedest) = match fpu_op {
                    FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
@ -3364,7 +3510,11 @@ impl Inst {
            }
            &Inst::LoadFpuConst64 { rd, const_data } => {
                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
-                format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
+                format!(
+                    "ldr {}, pc+8 ; b 12 ; data.f64 {}",
+                    rd,
+                    f64::from_bits(const_data)
+                )
            }
            &Inst::LoadFpuConst128 { rd, const_data } => {
                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
@ -3473,31 +3623,67 @@ impl Inst {
                let rn = show_vreg_element(rn, mb_rru, 0, size);
                format!("dup {}, {}", rd, rn)
            }
-            &Inst::VecDupImm { rd, imm, invert, size } => {
+            &Inst::VecDupFPImm { rd, imm, size } => {
                let imm = imm.show_rru(mb_rru);
-                let op = if invert {
-                    "mvni"
-                } else {
-                    "movi"
-                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+
+                format!("fmov {}, {}", rd, imm)
+            }
+            &Inst::VecDupImm {
+                rd,
+                imm,
+                invert,
+                size,
+            } => {
+                let imm = imm.show_rru(mb_rru);
+                let op = if invert { "mvni" } else { "movi" };
                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);

                format!("{} {}, {}", op, rd, imm)
            }
-            &Inst::VecExtend { t, rd, rn, high_half } => {
+            &Inst::VecExtend {
+                t,
+                rd,
+                rn,
+                high_half,
+            } => {
                let (op, dest, src) = match (t, high_half) {
-                    (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
-                    (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
-                    (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
-                    (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
-                    (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
-                    (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
-                    (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
-                    (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
-                    (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
-                    (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
-                    (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
-                    (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+                    (VecExtendOp::Sxtl8, false) => {
+                        ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8)
+                    }
+                    (VecExtendOp::Sxtl8, true) => {
+                        ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
+                    }
+                    (VecExtendOp::Sxtl16, false) => {
+                        ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4)
+                    }
+                    (VecExtendOp::Sxtl16, true) => {
+                        ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
+                    }
+                    (VecExtendOp::Sxtl32, false) => {
+                        ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2)
+                    }
+                    (VecExtendOp::Sxtl32, true) => {
+                        ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
+                    }
+                    (VecExtendOp::Uxtl8, false) => {
+                        ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8)
+                    }
+                    (VecExtendOp::Uxtl8, true) => {
+                        ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
+                    }
+                    (VecExtendOp::Uxtl16, false) => {
+                        ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4)
+                    }
+                    (VecExtendOp::Uxtl16, true) => {
+                        ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
+                    }
+                    (VecExtendOp::Uxtl32, false) => {
+                        ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2)
+                    }
+                    (VecExtendOp::Uxtl32, true) => {
+                        ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
+                    }
                };
                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
                let rn = show_vreg_vector(rn, mb_rru, src);
@ -3514,7 +3700,13 @@ impl Inst {
                let rn = show_vreg_element(rn, mb_rru, src_idx, size);
                format!("mov {}, {}", rd, rn)
            }
-            &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
+            &Inst::VecMiscNarrow {
+                op,
+                rd,
+                rn,
+                size,
+                high_half,
+            } => {
                let dest_size = if high_half {
                    assert!(size.is_128bits());
                    size
@ -3583,11 +3775,11 @@ impl Inst {
                };
                let rd_size = match alu_op {
                    VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
-                    _ => size
+                    _ => size,
                };
                let rn_size = match alu_op {
                    VecALUOp::Smull => size.halve(),
-                    _ => size
+                    _ => size,
                };
                let rm_size = rn_size;
                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
@ -3628,6 +3820,7 @@ impl Inst {
                    VecMisc2::Frintz => ("frintz", size),
                    VecMisc2::Frintm => ("frintm", size),
                    VecMisc2::Frintp => ("frintp", size),
+                    VecMisc2::Cnt => ("cnt", size),
                };

                let rd_size = if is_shll { size.widen() } else { size };
@ -3645,7 +3838,13 @@ impl Inst {
                let rn = show_vreg_vector(rn, mb_rru, size);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::VecShiftImm { op, rd, rn, size, imm } => {
+            &Inst::VecShiftImm {
+                op,
+                rd,
+                rn,
+                size,
+                imm,
+            } => {
                let op = match op {
                    VecShiftImmOp::Shl => "shl",
                    VecShiftImmOp::Ushr => "ushr",
@ -3698,7 +3897,10 @@ impl Inst {
                let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
                let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
                let cond = cond.show_rru(mb_rru);
-                format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
+                format!(
+                    "vcsel {}, {}, {}, {} (if-then-else diamond)",
+                    rd, rn, rm, cond
+                )
            }
            &Inst::MovToNZCV { rn } => {
                let rn = rn.show_rru(mb_rru);
@ -3711,63 +3913,60 @@ impl Inst {
            &Inst::Extend {
                rd,
                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits >= 8 => {
-                // Is the destination a 32-bit register? Corresponds to whether
-                // extend-to width is <= 32 bits, *unless* we have an unsigned
-                // 32-to-64-bit extension, which is implemented with a "mov" to a
-                // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
-                let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
-                    OperandSize::Size32
-                } else {
-                    OperandSize::from_bits(to_bits)
-                };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
-                let op = match (signed, from_bits, to_bits) {
-                    (false, 8, 32) => "uxtb",
-                    (true, 8, 32) => "sxtb",
-                    (false, 16, 32) => "uxth",
-                    (true, 16, 32) => "sxth",
-                    (false, 8, 64) => "uxtb",
-                    (true, 8, 64) => "sxtb",
-                    (false, 16, 64) => "uxth",
-                    (true, 16, 64) => "sxth",
-                    (false, 32, 64) => "mov", // special case (see above).
-                    (true, 32, 64) => "sxtw",
-                    _ => panic!("Unsupported Extend case: {:?}", self),
-                };
-                format!("{} {}, {}", op, rd, rn)
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && signed => {
-                let dest_size = OperandSize::from_bits(to_bits);
-                let zr = if dest_size.is32() { "wzr" } else { "xzr" };
-                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
-                format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
+                signed: false,
+                from_bits: 1,
                ..
-            } if from_bits == 1 && !signed => {
+            } => {
                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
                format!("and {}, {}, #1", rd, rn)
            }
-            &Inst::Extend { .. } => {
-                panic!("Unsupported Extend case");
+            &Inst::Extend {
+                rd,
+                rn,
+                signed: false,
+                from_bits: 32,
+                to_bits: 64,
+            } => {
+                // The case of a zero extension from 32 to 64 bits, is implemented
+                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
+                // the top 32 bits.
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                format!("mov {}, {}", rd, rn)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } => {
+                assert!(from_bits <= to_bits);
+                let op = match (signed, from_bits) {
+                    (false, 8) => "uxtb",
+                    (true, 8) => "sxtb",
+                    (false, 16) => "uxth",
+                    (true, 16) => "sxth",
+                    (true, 32) => "sxtw",
+                    (true, _) => "sbfx",
+                    (false, _) => "ubfx",
+                };
+                if op == "sbfx" || op == "ubfx" {
+                    let dest_size = OperandSize::from_bits(to_bits);
+                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                    let rn = show_ireg_sized(rn, mb_rru, dest_size);
+                    format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
+                } else {
+                    let dest_size = if signed {
+                        OperandSize::from_bits(to_bits)
+                    } else {
+                        OperandSize::Size32
+                    };
+                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                    let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+                    format!("{} {}, {}", op, rd, rn)
+                }
            }
            &Inst::Call { .. } => format!("bl 0"),
            &Inst::CallInd { ref info, .. } => {
@ -3878,9 +4077,12 @@ impl Inst {
                for inst in mem_insts.into_iter() {
                    ret.push_str(&inst.show_rru(mb_rru));
                }
-                let (reg, offset) = match mem {
-                    AMode::Unscaled(r, simm9) => (r, simm9.value()),
-                    AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
+                let (reg, index_reg, offset) = match mem {
+                    AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
+                    AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
+                    AMode::UnsignedOffset(r, uimm12scaled) => {
+                        (r, None, uimm12scaled.value() as i32)
+                    }
                    _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
                };
                let abs_offset = if offset < 0 {
@ -3894,8 +4096,18 @@ impl Inst {
                    ALUOp::Add64
                };

-                if offset == 0 {
-                    let mov = Inst::mov(rd, reg);
+                if let Some((idx, extendop)) = index_reg {
+                    let add = Inst::AluRRRExtend {
+                        alu_op: ALUOp::Add64,
+                        rd,
+                        rn: reg,
+                        rm: idx,
+                        extendop,
+                    };
+
+                    ret.push_str(&add.show_rru(mb_rru));
+                } else if offset == 0 {
+                    let mov = Inst::gen_move(rd, reg, I64);
                    ret.push_str(&mov.show_rru(mb_rru));
                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
                    let add = Inst::AluRRImm12 {
@ -3925,6 +4137,14 @@ impl Inst {
                format!("virtual_sp_offset_adjust {}", offset)
            }
            &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
+
+            &Inst::ValueLabelMarker { label, reg } => {
+                format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
+            }
+
+            &Inst::Unwind { ref inst } => {
+                format!("unwind {:?}", inst)
+            }
        }
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
@ -1,201 +1,2 @@
-use super::*;
-use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst};
-use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
-use crate::machinst::UnwindInfoContext;
-use crate::result::CodegenResult;
-use alloc::vec::Vec;
-use regalloc::Reg;
-
 #[cfg(feature = "unwind")]
 pub(crate) mod systemv;
-
-pub struct AArch64UnwindInfo;
-
-impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo {
-    fn create_unwind_info(
-        context: UnwindInfoContext<Inst>,
-    ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
-        let word_size = 8u8;
-        let pair_size = word_size * 2;
-        let mut codes = Vec::new();
-
-        for i in context.prologue.clone() {
-            let i = i as usize;
-            let inst = &context.insts[i];
-            let offset = context.insts_layout[i];
-
-            match inst {
-                Inst::StoreP64 {
-                    rt,
-                    rt2,
-                    mem: PairAMode::PreIndexed(rn, imm7),
-                    ..
-                } if *rt == regs::fp_reg()
-                    && *rt2 == regs::link_reg()
-                    && *rn == regs::writable_stack_reg()
-                    && imm7.value == -(pair_size as i16) =>
-                {
-                    // stp fp (x29), lr (x30), [sp, #-16]!
-                    codes.push((
-                        offset,
-                        UnwindCode::StackAlloc {
-                            size: pair_size as u32,
-                        },
-                    ));
-                    codes.push((
-                        offset,
-                        UnwindCode::SaveRegister {
-                            reg: *rt,
-                            stack_offset: 0,
-                        },
-                    ));
-                    codes.push((
-                        offset,
-                        UnwindCode::SaveRegister {
-                            reg: *rt2,
-                            stack_offset: word_size as u32,
-                        },
-                    ));
-                }
-                Inst::StoreP64 {
-                    rt,
-                    rt2,
-                    mem: PairAMode::PreIndexed(rn, imm7),
-                    ..
-                } if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => {
-                    // stp r1, r2, [sp, #(i * #16)]
-                    let stack_offset = imm7.value as u32;
-                    codes.push((
-                        offset,
-                        UnwindCode::SaveRegister {
-                            reg: *rt,
-                            stack_offset,
-                        },
-                    ));
-                    if *rt2 != regs::zero_reg() {
-                        codes.push((
-                            offset,
-                            UnwindCode::SaveRegister {
-                                reg: *rt2,
-                                stack_offset: stack_offset + word_size as u32,
-                            },
-                        ));
-                    }
-                }
-                Inst::AluRRImm12 {
-                    alu_op: ALUOp::Add64,
-                    rd,
-                    rn,
-                    imm12:
-                        Imm12 {
-                            bits: 0,
-                            shift12: false,
-                        },
-                } if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => {
-                    // mov fp (x29), sp.
-                    codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() }));
-                }
-                Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => {
-                    codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 }));
-                }
-                _ => {}
-            }
-        }
-
-        // TODO epilogues
-
-        let prologue_size = if context.prologue.is_empty() {
-            0
-        } else {
-            context.insts_layout[context.prologue.end as usize - 1]
-        };
-
-        Ok(Some(UnwindInfo {
-            prologue_size,
-            prologue_unwind_codes: codes,
-            epilogues_unwind_codes: vec![],
-            function_size: context.len,
-            word_size,
-            initial_sp_offset: 0,
-        }))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind};
-    use crate::isa::{lookup, CallConv};
-    use crate::settings::{builder, Flags};
-    use crate::Context;
-    use std::str::FromStr;
-    use target_lexicon::triple;
-
-    #[test]
-    fn test_simple_func() {
-        let isa = lookup(triple!("aarch64"))
-            .expect("expect aarch64 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::SystemV,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let result = context.mach_compile_result.unwrap();
-        let unwind_info = result.unwind_info.unwrap();
-
-        assert_eq!(
-            unwind_info,
-            UnwindInfo {
-                prologue_size: 12,
-                prologue_unwind_codes: vec![
-                    (4, UnwindCode::StackAlloc { size: 16 }),
-                    (
-                        4,
-                        UnwindCode::SaveRegister {
-                            reg: regs::fp_reg(),
-                            stack_offset: 0
-                        }
-                    ),
-                    (
-                        4,
-                        UnwindCode::SaveRegister {
-                            reg: regs::link_reg(),
-                            stack_offset: 8
-                        }
-                    ),
-                    (
-                        8,
-                        UnwindCode::SetFramePointer {
-                            reg: regs::fp_reg()
-                        }
-                    )
-                ],
-                epilogues_unwind_codes: vec![],
-                function_size: 24,
-                word_size: 8,
-                initial_sp_offset: 0,
-            }
-        );
-    }
-
-    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
-        let mut func =
-            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
-
-        let block0 = func.dfg.make_block();
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().return_(&[]);
-
-        if let Some(stack_slot) = stack_slot {
-            func.stack_slots.push(stack_slot);
-        }
-
-        func
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
@ -1,9 +1,7 @@
 //! Unwind information for System V ABI (Aarch64).

 use crate::isa::aarch64::inst::regs;
-use crate::isa::unwind::input;
-use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
-use crate::result::CodegenResult;
+use crate::isa::unwind::systemv::RegisterMappingError;
 use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
 use regalloc::{Reg, RegClass};

@ -31,128 +29,40 @@ pub fn create_cie() -> CommonInformationEntry {

 /// Map Cranelift registers to their corresponding Gimli registers.
 pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+    // For AArch64 DWARF register mappings, see:
+    //
+    // https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names
+    //
+    // X0--X31 is 0--31; V0--V31 is 64--95.
    match reg.get_class() {
-        RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())),
+        RegClass::I64 => {
+            let reg = reg.get_hw_encoding() as u16;
+            Ok(Register(reg))
+        }
+        RegClass::V128 => {
+            let reg = reg.get_hw_encoding() as u16;
+            Ok(Register(64 + reg))
+        }
        _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
    }
 }

-pub(crate) fn create_unwind_info(
-    unwind: input::UnwindInfo<Reg>,
-) -> CodegenResult<Option<UnwindInfo>> {
-    struct RegisterMapper;
-    impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
-        fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
-            Ok(map_reg(reg)?.0)
-        }
-        fn sp(&self) -> u16 {
-            regs::stack_reg().get_hw_encoding().into()
-        }
+pub(crate) struct RegisterMapper;
+
+impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+    fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+        Ok(map_reg(reg)?.0)
    }
-    let map = RegisterMapper;
-    Ok(Some(UnwindInfo::build(unwind, &map)?))
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::{
-        types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
-        StackSlotKind,
-    };
-    use crate::isa::{lookup, CallConv};
-    use crate::settings::{builder, Flags};
-    use crate::Context;
-    use gimli::write::Address;
-    use std::str::FromStr;
-    use target_lexicon::triple;
-
-    #[test]
-    fn test_simple_func() {
-        let isa = lookup(triple!("aarch64"))
-            .expect("expect aarch64 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::SystemV,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let fde = match context
-            .create_unwind_info(isa.as_ref())
-            .expect("can create unwind info")
-        {
-            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
-                info.to_fde(Address::Constant(1234))
-            }
-            _ => panic!("expected unwind information"),
-        };
-
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    fn sp(&self) -> u16 {
+        regs::stack_reg().get_hw_encoding().into()
    }
-
-    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
-        let mut func =
-            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
-
-        let block0 = func.dfg.make_block();
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().return_(&[]);
-
-        if let Some(stack_slot) = stack_slot {
-            func.stack_slots.push(stack_slot);
-        }
-
-        func
+    fn fp(&self) -> u16 {
+        regs::fp_reg().get_hw_encoding().into()
    }
-
-    #[test]
-    fn test_multi_return_func() {
-        let isa = lookup(triple!("aarch64"))
-            .expect("expect aarch64 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let fde = match context
-            .create_unwind_info(isa.as_ref())
-            .expect("can create unwind info")
-        {
-            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
-                info.to_fde(Address::Constant(4321))
-            }
-            _ => panic!("expected unwind information"),
-        };
-
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    fn lr(&self) -> Option<u16> {
+        Some(regs::link_reg().get_hw_encoding().into())
    }
-
-    fn create_multi_return_function(call_conv: CallConv) -> Function {
-        let mut sig = Signature::new(call_conv);
-        sig.params.push(AbiParam::new(types::I32));
-        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
-
-        let block0 = func.dfg.make_block();
-        let v0 = func.dfg.append_block_param(block0, types::I32);
-        let block1 = func.dfg.make_block();
-        let block2 = func.dfg.make_block();
-
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().brnz(v0, block2, &[]);
-        pos.ins().jump(block1, &[]);
-
-        pos.insert_block(block1);
-        pos.ins().return_(&[]);
-
-        pos.insert_block(block2);
-        pos.ins().return_(&[]);
-
-        func
+    fn lr_offset(&self) -> Option<u32> {
+        Some(8)
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@ -22,7 +22,7 @@ use super::lower_inst;

 use crate::data_value::DataValue;
 use log::{debug, trace};
-use regalloc::{Reg, RegClass, Writable};
+use regalloc::{Reg, Writable};
 use smallvec::SmallVec;

 //============================================================================
@ -111,7 +111,7 @@ pub(crate) enum ResultRegImmShift {

 /// Lower an instruction input to a 64-bit constant, if possible.
 pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
-    let input = ctx.get_input(input.insn, input.input);
+    let input = ctx.get_input_as_source_or_const(input.insn, input.input);
    input.constant
 }

@ -171,7 +171,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
    debug!("put_input_in_reg: input {:?}", input);
    let ty = ctx.input_ty(input.insn, input.input);
    let from_bits = ty_bits(ty) as u8;
-    let inputs = ctx.get_input(input.insn, input.input);
+    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
    let in_reg = if let Some(c) = inputs.constant {
        // Generate constants fresh at each use to minimize long-range register pressure.
        let masked = if from_bits < 64 {
@ -179,9 +179,9 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
        } else {
            c
        };
-        let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
-        for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
-            ctx.alloc_tmp(reg_class, ty)
+        let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
+        for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ty, |ty| {
+            ctx.alloc_tmp(ty).only_reg().unwrap()
        })
        .into_iter()
        {
@ -189,14 +189,15 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
        }
        to_reg.to_reg()
    } else {
-        ctx.use_input_reg(inputs);
-        inputs.reg
+        ctx.put_input_in_regs(input.insn, input.input)
+            .only_reg()
+            .unwrap()
    };

    match (narrow_mode, from_bits) {
        (NarrowValueMode::None, _) => in_reg,
        (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -207,7 +208,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
            tmp.to_reg()
        }
        (NarrowValueMode::SignExtend32, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -224,7 +225,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
                // Constants are zero-extended to full 64-bit width on load already.
                in_reg
            } else {
-                let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+                let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
                ctx.emit(Inst::Extend {
                    rd: tmp,
                    rn: in_reg,
@ -236,7 +237,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
            }
        }
        (NarrowValueMode::SignExtend64, n) if n < 64 => {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::Extend {
                rd: tmp,
                rn: in_reg,
@ -272,7 +273,7 @@ fn put_input_in_rs<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRS {
-    let inputs = ctx.get_input(input.insn, input.input);
+    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
    if let Some((insn, 0)) = inputs.inst {
        let op = ctx.data(insn).opcode();

@ -305,7 +306,7 @@ fn put_input_in_rse<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> ResultRSE {
-    let inputs = ctx.get_input(input.insn, input.input);
+    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
    if let Some((insn, 0)) = inputs.inst {
        let op = ctx.data(insn).opcode();
        let out_ty = ctx.output_ty(insn, 0);
@ -697,7 +698,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
    /* addends64.len() == 0 */
    {
        if addends32.len() > 0 {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+            let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
            let (reg1, extendop) = addends32.pop().unwrap();
            let signed = match extendop {
                ExtendOp::SXTW => true,
@ -719,7 +720,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
        } else
        /* addends32.len() == 0 */
        {
-            let off_reg = ctx.alloc_tmp(RegClass::I64, I64);
+            let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap();
            lower_constant_u64(ctx, off_reg, offset as u64);
            offset = 0;
            AMode::reg(off_reg.to_reg())
@ -735,7 +736,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
    }

    // Allocate the temp and shoehorn it into the AMode.
-    let addr = ctx.alloc_tmp(RegClass::I64, I64);
+    let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
    let (reg, memarg) = match memarg {
        AMode::RegExtended(r1, r2, extendop) => {
            (r1, AMode::RegExtended(addr.to_reg(), r2, extendop))
@ -783,7 +784,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
        // If the register is the stack reg, we must move it to another reg
        // before adding it.
        let reg = if reg == stack_reg() {
-            let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+            let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
            ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
            tmp.to_reg()
        } else {
@ -825,7 +826,7 @@ pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
    rd: Writable<Reg>,
    value: f32,
 ) {
-    let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

    for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
        ctx.emit(inst);
@ -837,7 +838,7 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
    rd: Writable<Reg>,
    value: f64,
 ) {
-    let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

    for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
        ctx.emit(inst);
@ -854,12 +855,12 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
        // is potentially expensive.
        ctx.emit(Inst::VecDupImm {
            rd,
-            imm: ASIMDMovModImm::zero(),
+            imm: ASIMDMovModImm::zero(ScalarSize::Size8),
            invert: false,
            size: VectorSize::Size8x16,
        });
    } else {
-        let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+        let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
        for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
            ctx.emit(inst);
        }
@ -886,7 +887,7 @@ pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
        ),
        None => (value, size),
    };
-    let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();

    for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
        ctx.emit(inst);
@ -1052,7 +1053,7 @@ pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
    input: InsnInput,
    op: Opcode,
 ) -> Option<IRInst> {
-    let inputs = c.get_input(input.insn, input.input);
+    let inputs = c.get_input_as_source_or_const(input.insn, input.input);
    debug!(
        "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
        input, inputs, op
@ -1092,14 +1093,14 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
    op: Opcode,
    conv: Opcode,
 ) -> Option<IRInst> {
-    let inputs = c.get_input(input.insn, input.input);
+    let inputs = c.get_input_as_source_or_const(input.insn, input.input);
    if let Some((src_inst, _)) = inputs.inst {
        let data = c.data(src_inst);
        if data.opcode() == op {
            return Some(src_inst);
        }
        if data.opcode() == conv {
-            let inputs = c.get_input(src_inst, 0);
+            let inputs = c.get_input_as_source_or_const(src_inst, 0);
            if let Some((src_inst, _)) = inputs.inst {
                let data = c.data(src_inst);
                if data.opcode() == op {
@ -1152,24 +1153,77 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
    }
 }

-/// Convert a 0 / 1 result, such as from a conditional-set instruction, into a 0
-/// / -1 (all-ones) result as expected for bool operations.
-pub(crate) fn normalize_bool_result<C: LowerCtx<I = Inst>>(
+/// Materialize a boolean value into a register from the flags
+/// (e.g set by a comparison).
+/// A 0 / -1 (all-ones) result as expected for bool operations.
+pub(crate) fn materialize_bool_result<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    insn: IRInst,
    rd: Writable<Reg>,
+    cond: Cond,
 ) {
-    // A boolean is 0 / -1; if output width is > 1, negate.
+    // A boolean is 0 / -1; if output width is > 1 use `csetm`,
+    // otherwise use `cset`.
    if ty_bits(ctx.output_ty(insn, 0)) > 1 {
-        ctx.emit(Inst::AluRRR {
-            alu_op: ALUOp::Sub64,
-            rd,
-            rn: zero_reg(),
-            rm: rd.to_reg(),
-        });
+        ctx.emit(Inst::CSetm { rd, cond });
+    } else {
+        ctx.emit(Inst::CSet { rd, cond });
    }
 }

+/// This is target-word-size dependent.  And it excludes booleans and reftypes.
+pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+    match ty {
+        I8 | I16 | I32 | I64 => true,
+        _ => false,
+    }
+}
+
+fn load_op_to_ty(op: Opcode) -> Option<Type> {
+    match op {
+        Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8),
+        Opcode::Sload16 | Opcode::Uload16 | Opcode::Sload16Complex | Opcode::Uload16Complex => {
+            Some(I16)
+        }
+        Opcode::Sload32 | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => {
+            Some(I32)
+        }
+        Opcode::Load | Opcode::LoadComplex => None,
+        Opcode::Sload8x8 | Opcode::Uload8x8 | Opcode::Sload8x8Complex | Opcode::Uload8x8Complex => {
+            Some(I8X8)
+        }
+        Opcode::Sload16x4
+        | Opcode::Uload16x4
+        | Opcode::Sload16x4Complex
+        | Opcode::Uload16x4Complex => Some(I16X4),
+        Opcode::Sload32x2
+        | Opcode::Uload32x2
+        | Opcode::Sload32x2Complex
+        | Opcode::Uload32x2Complex => Some(I32X2),
+        _ => None,
+    }
+}
+
+/// Helper to lower a load instruction; this is used in several places, because
+/// a load can sometimes be merged into another operation.
+pub(crate) fn lower_load<C: LowerCtx<I = Inst>, F: FnMut(&mut C, Writable<Reg>, Type, AMode)>(
+    ctx: &mut C,
+    ir_inst: IRInst,
+    inputs: &[InsnInput],
+    output: InsnOutput,
+    mut f: F,
+) {
+    let op = ctx.data(ir_inst).opcode();
+
+    let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0));
+
+    let off = ctx.data(ir_inst).load_store_offset().unwrap();
+    let mem = lower_address(ctx, elem_ty, &inputs[..], off);
+    let rd = get_output_reg(ctx, output).only_reg().unwrap();
+
+    f(ctx, rd, elem_ty, mem);
+}
+
 //=============================================================================
 // Lowering-backend trait implementation.

@ -1177,7 +1231,7 @@ impl LowerBackend for AArch64Backend {
    type MInst = Inst;

    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
-        lower_inst::lower_insn_to_regs(ctx, ir_inst)
+        lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
    }

    fn lower_branch_group<C: LowerCtx<I = Inst>>(
@ -1185,9 +1239,8 @@ impl LowerBackend for AArch64Backend {
        ctx: &mut C,
        branches: &[IRInst],
        targets: &[MachLabel],
-        fallthrough: Option<MachLabel>,
    ) -> CodegenResult<()> {
-        lower_inst::lower_branch(ctx, branches, targets, fallthrough)
+        lower_inst::lower_branch(ctx, branches, targets)
    }

    fn maybe_pinned_reg(&self) -> Option<Reg> {
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
@ -2,13 +2,13 @@

 use crate::ir::condcodes::IntCC;
 use crate::ir::Function;
+use crate::isa::aarch64::settings as aarch64_settings;
 use crate::isa::Builder as IsaBuilder;
 use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
 use crate::result::CodegenResult;
-use crate::settings;
-
-use alloc::boxed::Box;
-
+use crate::settings as shared_settings;
+use alloc::{boxed::Box, vec::Vec};
+use core::hash::{Hash, Hasher};
 use regalloc::{PrettyPrint, RealRegUniverse};
 use target_lexicon::{Aarch64Architecture, Architecture, Triple};

@ -17,6 +17,7 @@ mod abi;
 pub(crate) mod inst;
 mod lower;
 mod lower_inst;
+mod settings;

 use inst::create_reg_universe;

@ -25,17 +26,23 @@ use self::inst::EmitInfo;
 /// An AArch64 backend.
 pub struct AArch64Backend {
    triple: Triple,
-    flags: settings::Flags,
+    flags: shared_settings::Flags,
+    isa_flags: aarch64_settings::Flags,
    reg_universe: RealRegUniverse,
 }

 impl AArch64Backend {
    /// Create a new AArch64 backend with the given (shared) flags.
-    pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
+    pub fn new_with_flags(
+        triple: Triple,
+        flags: shared_settings::Flags,
+        isa_flags: aarch64_settings::Flags,
+    ) -> AArch64Backend {
        let reg_universe = create_reg_universe(&flags);
        AArch64Backend {
            triple,
            flags,
+            isa_flags,
            reg_universe,
        }
    }
@ -45,7 +52,7 @@ impl AArch64Backend {
    fn compile_vcode(
        &self,
        func: &Function,
-        flags: settings::Flags,
+        flags: shared_settings::Flags,
    ) -> CodegenResult<VCode<inst::Inst>> {
        let emit_info = EmitInfo::new(flags.clone());
        let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
@ -64,7 +71,7 @@ impl MachBackend for AArch64Backend {

        let buffer = vcode.emit();
        let frame_size = vcode.frame_size();
-        let unwind_info = vcode.unwind_info()?;
+        let stackslot_offsets = vcode.stackslot_offsets().clone();

        let disasm = if want_disasm {
            Some(vcode.show_rru(Some(&create_reg_universe(flags))))
@ -78,7 +85,8 @@ impl MachBackend for AArch64Backend {
            buffer,
            frame_size,
            disasm,
-            unwind_info,
+            value_labels_ranges: Default::default(),
+            stackslot_offsets,
        })
    }

@ -90,10 +98,19 @@ impl MachBackend for AArch64Backend {
        self.triple.clone()
    }

-    fn flags(&self) -> &settings::Flags {
+    fn flags(&self) -> &shared_settings::Flags {
        &self.flags
    }

+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.isa_flags.iter().collect()
+    }
+
+    fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
+        self.flags.hash(&mut hasher);
+        self.isa_flags.hash(&mut hasher);
+    }
+
    fn reg_universe(&self) -> &RealRegUniverse {
        &self.reg_universe
    }
@ -119,11 +136,18 @@ impl MachBackend for AArch64Backend {
    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
        use crate::isa::unwind::UnwindInfo;
        use crate::machinst::UnwindInfoKind;
-        Ok(match (result.unwind_info.as_ref(), kind) {
-            (Some(info), UnwindInfoKind::SystemV) => {
-                inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
+        Ok(match kind {
+            UnwindInfoKind::SystemV => {
+                let mapper = self::inst::unwind::systemv::RegisterMapper;
+                Some(UnwindInfo::SystemV(
+                    crate::isa::unwind::systemv::create_unwind_info_from_insts(
+                        &result.buffer.unwind_info[..],
+                        result.buffer.data.len(),
+                        &mapper,
+                    )?,
+                ))
            }
-            (Some(_info), UnwindInfoKind::Windows) => {
+            UnwindInfoKind::Windows => {
                // TODO: support Windows unwind info on AArch64
                None
            }
@ -142,9 +166,10 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
    assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
    IsaBuilder {
        triple,
-        setup: settings::builder(),
-        constructor: |triple, shared_flags, _| {
-            let backend = AArch64Backend::new_with_flags(triple, shared_flags);
+        setup: aarch64_settings::builder(),
+        constructor: |triple, shared_flags, builder| {
+            let isa_flags = aarch64_settings::Flags::new(&shared_flags, builder);
+            let backend = AArch64Backend::new_with_flags(triple, shared_flags, isa_flags);
            Box::new(TargetIsaAdapter::new(backend))
        },
    }
@ -179,11 +204,14 @@ mod test {
        let v1 = pos.ins().iadd(arg0, v0);
        pos.ins().return_(&[v1]);

-        let mut shared_flags = settings::builder();
-        shared_flags.set("opt_level", "none").unwrap();
+        let mut shared_flags_builder = settings::builder();
+        shared_flags_builder.set("opt_level", "none").unwrap();
+        let shared_flags = settings::Flags::new(shared_flags_builder);
+        let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
        let backend = AArch64Backend::new_with_flags(
            Triple::from_str("aarch64").unwrap(),
-            settings::Flags::new(shared_flags),
+            shared_flags,
+            isa_flags,
        );
        let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
        let code = &buffer.data[..];
@ -192,12 +220,11 @@ mod test {
        // mov x29, sp
        // mov x1, #0x1234
        // add w0, w0, w1
-        // mov sp, x29
        // ldp x29, x30, [sp], #16
        // ret
        let golden = vec![
            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
-            0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
+            0x01, 0x0b, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
        ];

        assert_eq!(code, &golden[..]);
@ -234,11 +261,14 @@ mod test {
        let v3 = pos.ins().isub(v1, v0);
        pos.ins().return_(&[v3]);

-        let mut shared_flags = settings::builder();
-        shared_flags.set("opt_level", "none").unwrap();
+        let mut shared_flags_builder = settings::builder();
+        shared_flags_builder.set("opt_level", "none").unwrap();
+        let shared_flags = settings::Flags::new(shared_flags_builder);
+        let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
        let backend = AArch64Backend::new_with_flags(
            Triple::from_str("aarch64").unwrap(),
-            settings::Flags::new(shared_flags),
+            shared_flags,
+            isa_flags,
        );
        let result = backend
            .compile_function(&mut func, /* want_disasm = */ false)
@ -259,14 +289,13 @@ mod test {
        // cbnz	x1, 0x18
        // mov	x1, #0x1234                	// #4660
        // sub	w0, w0, w1
-        // mov	sp, x29
        // ldp	x29, x30, [sp], #16
        // ret
        let golden = vec![
            253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
            0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
-            0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
-            193, 168, 192, 3, 95, 214,
+            0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 253, 123, 193, 168, 192, 3,
+            95, 214,
        ];

        assert_eq!(code, &golden[..]);
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/settings.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/settings.rs
@ -0,0 +1,9 @@
+//! AArch64 Settings.
+
+use crate::settings::{self, detail, Builder, Value};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/arm64/settings.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs
@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use regalloc::{RealReg, Reg, RegClass, Set, Writable};
-use smallvec::SmallVec;
+use smallvec::{smallvec, SmallVec};

 /// Support for the ARM ABI from the callee side (within a function body).
 pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {

    fn compute_arg_locs(
        _call_conv: isa::CallConv,
+        _flags: &settings::Flags,
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
@ -81,7 +82,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
            if next_rreg < max_reg_val {
                let reg = rreg(next_rreg);

-                ret.push(ABIArg::Reg(
+                ret.push(ABIArg::reg(
                    reg.to_real_reg(),
                    param.value_type,
                    param.extension,
@ -101,7 +102,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        let extra_arg = if add_ret_area_ptr {
            debug_assert!(args_or_rets == ArgsOrRets::Args);
            if next_rreg < max_reg_val {
-                ret.push(ABIArg::Reg(
+                ret.push(ABIArg::reg(
                    rreg(next_rreg).to_real_reg(),
                    I32,
                    ir::ArgumentExtension::None,
@ -124,7 +125,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        let max_stack = next_stack;
        for (ty, ext, purpose) in stack_args.into_iter().rev() {
            next_stack -= 4;
-            ret.push(ABIArg::Stack(
+            ret.push(ABIArg::stack(
                (max_stack - next_stack) as i64,
                ty,
                ext,
@ -185,7 +186,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        Inst::EpiloguePlaceholder
    }

-    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
        let mut insts = SmallVec::new();

        if let Some(imm12) = UImm12::maybe_from_i64(imm as i64) {
@ -209,7 +210,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        insts
    }

-    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
        let mut insts = SmallVec::new();
        insts.push(Inst::Cmp {
            rn: sp_reg(),
@ -243,7 +244,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        Inst::gen_store(from_reg, mem, ty)
    }

-    fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
        let mut ret = SmallVec::new();

        if amount == 0 {
@ -283,7 +284,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        Inst::VirtualSPOffsetAdj { offset }
    }

-    fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
+    fn gen_prologue_frame_setup(_: &settings::Flags) -> SmallInstVec<Inst> {
        let mut ret = SmallVec::new();
        let reg_list = vec![fp_reg(), lr_reg()];
        ret.push(Inst::Push { reg_list });
@ -294,7 +295,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
        ret
    }

-    fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
+    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
        let mut ret = SmallVec::new();
        ret.push(Inst::Mov {
            rd: writable_sp_reg(),
@ -305,6 +306,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
        ret
    }

+    fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
+        // TODO: implement if we ever require stack probes on ARM32 (unlikely
+        // unless Lucet is ported)
+        smallvec![]
+    }
+
    /// Returns stack bytes used as well as instructions. Does not adjust
    /// nominal SP offset; caller will do that.
    fn gen_clobber_save(
@ -312,7 +319,6 @@ impl ABIMachineSpec for Arm32MachineDeps {
        _flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Inst; 16]>) {
        let mut insts = SmallVec::new();
        if fixed_frame_storage_size > 0 {
@ -342,7 +348,6 @@ impl ABIMachineSpec for Arm32MachineDeps {
        _flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
        _fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
    ) -> SmallVec<[Inst; 16]> {
        let mut insts = SmallVec::new();
        let clobbered_vec = get_callee_saves(clobbers);
@ -420,6 +425,15 @@ impl ABIMachineSpec for Arm32MachineDeps {
        insts
    }

+    fn gen_memcpy(
+        _call_conv: isa::CallConv,
+        _dst: Reg,
+        _src: Reg,
+        _size: usize,
+    ) -> SmallVec<[Self::I; 8]> {
+        unimplemented!("StructArgs not implemented for ARM32 yet");
+    }
+
    fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
        match rc {
            RegClass::I32 => 1,
@ -445,6 +459,13 @@ impl ABIMachineSpec for Arm32MachineDeps {
        }
        caller_saved
    }
+
+    fn get_ext_mode(
+        _call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        specified
+    }
 }

 fn is_callee_save(r: RealReg) -> bool {
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs
@ -286,7 +286,6 @@ impl MachInstEmitInfo for EmitInfo {
 impl MachInstEmit for Inst {
    type Info = EmitInfo;
    type State = EmitState;
-    type UnwindInfo = super::unwind::Arm32UnwindInfo;

    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
        let start_off = sink.cur_offset();
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs
@ -22,7 +22,6 @@ mod emit;
 pub use self::emit::*;
 mod regs;
 pub use self::regs::*;
-pub mod unwind;

 #[cfg(test)]
 mod emit_tests;
@ -807,12 +806,17 @@ impl MachInst for Inst {
        Inst::mov(to_reg, from_reg)
    }

-    fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
-        to_reg: Writable<Reg>,
-        value: u64,
+    fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
+        to_regs: ValueRegs<Writable<Reg>>,
+        value: u128,
        ty: Type,
        _alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
+        let to_reg = to_regs
+            .only_reg()
+            .expect("multi-reg values not supported yet");
+        let value = value as u64;
+
        match ty {
            B1 | I8 | B8 | I16 | B16 | I32 | B32 => {
                let v: i64 = value as i64;
@ -826,11 +830,10 @@ impl MachInst for Inst {
        }
    }

-    fn gen_zero_len_nop() -> Inst {
-        Inst::Nop0
-    }
-
    fn gen_nop(preferred_size: usize) -> Inst {
+        if preferred_size == 0 {
+            return Inst::Nop0;
+        }
        assert!(preferred_size >= 2);
        Inst::Nop2
    }
@ -839,10 +842,10 @@ impl MachInst for Inst {
        None
    }

-    fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
        match ty {
-            I8 | I16 | I32 | B1 | B8 | B16 | B32 => Ok(RegClass::I32),
-            IFLAGS => Ok(RegClass::I32),
+            I8 | I16 | I32 | B1 | B8 | B16 | B32 => Ok((&[RegClass::I32], &[I32])),
+            IFLAGS => Ok((&[RegClass::I32], &[I32])),
            _ => Err(CodegenError::Unsupported(format!(
                "Unexpected SSA-value type: {}",
                ty
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs
@ -1,14 +0,0 @@
-use super::*;
-use crate::isa::unwind::input::UnwindInfo;
-use crate::result::CodegenResult;
-
-pub struct Arm32UnwindInfo;
-
-impl UnwindInfoGenerator<Inst> for Arm32UnwindInfo {
-    fn create_unwind_info(
-        _context: UnwindInfoContext<Inst>,
-    ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
-        // TODO
-        Ok(None)
-    }
-}
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs
@ -13,7 +13,7 @@ use crate::isa::arm32::Arm32Backend;

 use super::lower_inst;

-use regalloc::{Reg, RegClass, Writable};
+use regalloc::{Reg, Writable};

 //============================================================================
 // Lowering: convert instruction outputs to result types.
@ -55,7 +55,7 @@ pub(crate) enum NarrowValueMode {

 /// Lower an instruction output to a reg.
 pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
-    ctx.get_output(out.insn, out.output)
+    ctx.get_output(out.insn, out.output).only_reg().unwrap()
 }

 /// Lower an instruction input to a reg.
@ -68,24 +68,27 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
 ) -> Reg {
    let ty = ctx.input_ty(input.insn, input.input);
    let from_bits = ty.bits() as u8;
-    let inputs = ctx.get_input(input.insn, input.input);
+    let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
    let in_reg = if let Some(c) = inputs.constant {
-        let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
-        for inst in Inst::gen_constant(to_reg, c, ty, |reg_class, ty| ctx.alloc_tmp(reg_class, ty))
-            .into_iter()
+        let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
+        for inst in Inst::gen_constant(ValueRegs::one(to_reg), c as u128, ty, |ty| {
+            ctx.alloc_tmp(ty).only_reg().unwrap()
+        })
+        .into_iter()
        {
            ctx.emit(inst);
        }
        to_reg.to_reg()
    } else {
-        ctx.use_input_reg(inputs);
-        inputs.reg
+        ctx.put_input_in_regs(input.insn, input.input)
+            .only_reg()
+            .unwrap()
    };

    match (narrow_mode, from_bits) {
        (NarrowValueMode::None, _) => in_reg,
        (NarrowValueMode::ZeroExtend, 1) => {
-            let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::AluRRImm8 {
                alu_op: ALUOp::And,
                rd: tmp,
@ -95,7 +98,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
            tmp.to_reg()
        }
        (NarrowValueMode::ZeroExtend, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::Extend {
                rd: tmp,
                rm: in_reg,
@ -105,7 +108,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
            tmp.to_reg()
        }
        (NarrowValueMode::SignExtend, n) if n < 32 => {
-            let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
            ctx.emit(Inst::Extend {
                rd: tmp,
                rm: in_reg,
@ -221,7 +224,7 @@ impl LowerBackend for Arm32Backend {
    type MInst = Inst;

    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
-        lower_inst::lower_insn_to_regs(ctx, ir_inst)
+        lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags)
    }

    fn lower_branch_group<C: LowerCtx<I = Inst>>(
@ -229,9 +232,8 @@ impl LowerBackend for Arm32Backend {
        ctx: &mut C,
        branches: &[IRInst],
        targets: &[MachLabel],
-        fallthrough: Option<MachLabel>,
    ) -> CodegenResult<()> {
-        lower_inst::lower_branch(ctx, branches, targets, fallthrough)
+        lower_inst::lower_branch(ctx, branches, targets)
    }

    fn maybe_pinned_reg(&self) -> Option<Reg> {
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs
@ -5,12 +5,12 @@ use crate::ir::Inst as IRInst;
 use crate::ir::Opcode;
 use crate::machinst::lower::*;
 use crate::machinst::*;
+use crate::settings::Flags;
 use crate::CodegenResult;

 use crate::isa::arm32::abi::*;
 use crate::isa::arm32::inst::*;

-use regalloc::RegClass;
 use smallvec::SmallVec;

 use super::lower::*;
@ -19,6 +19,7 @@ use super::lower::*;
 pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    insn: IRInst,
+    flags: &Flags,
 ) -> CodegenResult<()> {
    let op = ctx.data(insn).opcode();
    let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
@ -143,7 +144,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rd = output_to_reg(ctx, outputs[0]);
            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+            let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();

            // ror rd, rn, 32 - (rm & 31)
            ctx.emit(Inst::AluRRImm8 {
@ -171,7 +172,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            match ty {
                I32 => {
                    let rd_hi = output_to_reg(ctx, outputs[0]);
-                    let rd_lo = ctx.alloc_tmp(RegClass::I32, ty);
+                    let rd_lo = ctx.alloc_tmp(ty).only_reg().unwrap();
                    let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
                    let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);

@ -316,7 +317,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::Trueif => {
            let cmp_insn = ctx
-                .get_input(inputs[0].insn, inputs[0].input)
+                .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
                .inst
                .unwrap()
                .0;
@ -344,7 +345,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                // Verification ensures that the input is always a single-def ifcmp.
                let cmp_insn = ctx
-                    .get_input(inputs[0].insn, inputs[0].input)
+                    .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
                    .inst
                    .unwrap()
                    .0;
@ -471,7 +472,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::Trapif => {
            let cmp_insn = ctx
-                .get_input(inputs[0].insn, inputs[0].input)
+                .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
                .inst
                .unwrap()
                .0;
@ -487,7 +488,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::FallthroughReturn | Opcode::Return => {
            for (i, input) in inputs.iter().enumerate() {
                let reg = input_to_reg(ctx, *input, NarrowValueMode::None);
-                let retval_reg = ctx.retval(i);
+                let retval_reg = ctx.retval(i).only_reg().unwrap();
                let ty = ctx.input_ty(insn, i);

                ctx.emit(Inst::gen_move(retval_reg, reg, ty));
@ -503,7 +504,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    assert_eq!(inputs.len(), sig.params.len());
                    assert_eq!(outputs.len(), sig.returns.len());
                    (
-                        Arm32ABICaller::from_func(sig, &extname, dist, caller_conv)?,
+                        Arm32ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
                        &inputs[..],
                    )
                }
@ -513,7 +514,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    assert_eq!(inputs.len() - 1, sig.params.len());
                    assert_eq!(outputs.len(), sig.returns.len());
                    (
-                        Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
+                        Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
                        &inputs[1..],
                    )
                }
@ -522,12 +523,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            assert_eq!(inputs.len(), abi.num_args());
            for (i, input) in inputs.iter().enumerate().filter(|(i, _)| *i <= 3) {
                let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
-                abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+                abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg));
            }
            abi.emit_call(ctx);
            for (i, output) in outputs.iter().enumerate() {
                let retval_reg = output_to_reg(ctx, *output);
-                abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+                abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg));
            }
        }
        _ => panic!("lowering {} unimplemented!", op),
@ -540,7 +541,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    branches: &[IRInst],
    targets: &[MachLabel],
-    fallthrough: Option<MachLabel>,
 ) -> CodegenResult<()> {
    // A block should end with at most two branches. The first may be a
    // conditional branch; a conditional branch can be followed only by an
@ -557,11 +557,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(

        assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
        let taken = BranchTarget::Label(targets[0]);
-        let not_taken = match op1 {
-            Opcode::Jump => BranchTarget::Label(targets[1]),
-            Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
-            _ => unreachable!(), // assert above.
-        };
+        let not_taken = BranchTarget::Label(targets[1]);
+
        match op0 {
            Opcode::Brz | Opcode::Brnz => {
                let rn = input_to_reg(
--- a/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
@ -7,7 +7,8 @@ use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter,
 use crate::result::CodegenResult;
 use crate::settings;

-use alloc::boxed::Box;
+use alloc::{boxed::Box, vec::Vec};
+use core::hash::{Hash, Hasher};
 use regalloc::{PrettyPrint, RealRegUniverse};
 use target_lexicon::{Architecture, ArmArchitecture, Triple};

@ -60,6 +61,7 @@ impl MachBackend for Arm32Backend {
        let vcode = self.compile_vcode(func, flags.clone())?;
        let buffer = vcode.emit();
        let frame_size = vcode.frame_size();
+        let stackslot_offsets = vcode.stackslot_offsets().clone();

        let disasm = if want_disasm {
            Some(vcode.show_rru(Some(&create_reg_universe())))
@ -73,7 +75,8 @@ impl MachBackend for Arm32Backend {
            buffer,
            frame_size,
            disasm,
-            unwind_info: None,
+            value_labels_ranges: Default::default(),
+            stackslot_offsets,
        })
    }

@ -89,6 +92,14 @@ impl MachBackend for Arm32Backend {
        &self.flags
    }

+    fn isa_flags(&self) -> Vec<settings::Value> {
+        Vec::new()
+    }
+
+    fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
+        self.flags.hash(&mut hasher);
+    }
+
    fn reg_universe(&self) -> &RealRegUniverse {
        &self.reg_universe
    }
--- a/third_party/rust/cranelift-codegen/src/isa/call_conv.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/call_conv.rs
@ -10,22 +10,24 @@ use serde::{Deserialize, Serialize};
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum CallConv {
-    /// Best performance, not ABI-stable
+    /// Best performance, not ABI-stable.
    Fast,
-    /// Smallest caller code size, not ABI-stable
+    /// Smallest caller code size, not ABI-stable.
    Cold,
-    /// System V-style convention used on many platforms
+    /// System V-style convention used on many platforms.
    SystemV,
-    /// Windows "fastcall" convention, also used for x64 and ARM
+    /// Windows "fastcall" convention, also used for x64 and ARM.
    WindowsFastcall,
-    /// SpiderMonkey WebAssembly convention on systems using natively SystemV
+    /// Mac aarch64 calling convention, which is a tweak aarch64 ABI.
+    AppleAarch64,
+    /// SpiderMonkey WebAssembly convention on systems using natively SystemV.
    BaldrdashSystemV,
-    /// SpiderMonkey WebAssembly convention on Windows
+    /// SpiderMonkey WebAssembly convention on Windows.
    BaldrdashWindows,
    /// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS
    /// register slots in the frame.
    Baldrdash2020,
-    /// Specialized convention for the probestack function
+    /// Specialized convention for the probestack function.
    Probestack,
 }

@ -36,6 +38,7 @@ impl CallConv {
            // Default to System V for unknown targets because most everything
            // uses System V.
            Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV,
+            Ok(CallingConvention::AppleAarch64) => Self::AppleAarch64,
            Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall,
            Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
        }
@ -49,6 +52,7 @@ impl CallConv {
            LibcallCallConv::Cold => Self::Cold,
            LibcallCallConv::SystemV => Self::SystemV,
            LibcallCallConv::WindowsFastcall => Self::WindowsFastcall,
+            LibcallCallConv::AppleAarch64 => Self::AppleAarch64,
            LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV,
            LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows,
            LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020,
@ -80,6 +84,7 @@ impl fmt::Display for CallConv {
            Self::Cold => "cold",
            Self::SystemV => "system_v",
            Self::WindowsFastcall => "windows_fastcall",
+            Self::AppleAarch64 => "apple_aarch64",
            Self::BaldrdashSystemV => "baldrdash_system_v",
            Self::BaldrdashWindows => "baldrdash_windows",
            Self::Baldrdash2020 => "baldrdash_2020",
@ -96,6 +101,7 @@ impl str::FromStr for CallConv {
            "cold" => Ok(Self::Cold),
            "system_v" => Ok(Self::SystemV),
            "windows_fastcall" => Ok(Self::WindowsFastcall),
+            "apple_aarch64" => Ok(Self::AppleAarch64),
            "baldrdash_system_v" => Ok(Self::BaldrdashSystemV),
            "baldrdash_windows" => Ok(Self::BaldrdashWindows),
            "baldrdash_2020" => Ok(Self::Baldrdash2020),
--- a/third_party/rust/cranelift-codegen/src/isa/encoding.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/encoding.rs
@ -6,6 +6,9 @@ use crate::isa::constraints::{BranchRange, RecipeConstraints};
 use crate::regalloc::RegDiversions;
 use core::fmt;

+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Bits needed to encode an instruction as binary machine code.
 ///
 /// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
@ -13,6 +16,7 @@ use core::fmt;
 /// operands to encoded bits. The encoding bits provide additional information to the recipe,
 /// typically parts of the opcode.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Encoding {
    recipe: u16,
    bits: u16,
--- a/third_party/rust/cranelift-codegen/src/isa/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/mod.rs
@ -20,7 +20,6 @@
 //! appropriate for the requested ISA:
 //!
 //! ```
-//! # extern crate cranelift_codegen;
 //! # #[macro_use] extern crate target_lexicon;
 //! use cranelift_codegen::isa;
 //! use cranelift_codegen::settings::{self, Configurable};
@ -30,12 +29,12 @@
 //! let shared_builder = settings::builder();
 //! let shared_flags = settings::Flags::new(shared_builder);
 //!
-//! match isa::lookup(triple!("riscv32")) {
+//! match isa::lookup(triple!("x86_64")) {
 //!     Err(_) => {
-//!         // The RISC-V target ISA is not available.
+//!         // The x86_64 target ISA is not available.
 //!     }
 //!     Ok(mut isa_builder) => {
-//!         isa_builder.set("supports_m", "on");
+//!         isa_builder.set("use_popcnt", "on");
 //!         let isa = isa_builder.finish(shared_flags);
 //!     }
 //! }
@ -64,21 +63,26 @@ use crate::result::CodegenResult;
 use crate::settings;
 use crate::settings::SetResult;
 use crate::timing;
-use alloc::borrow::Cow;
-use alloc::boxed::Box;
+use alloc::{borrow::Cow, boxed::Box, vec::Vec};
 use core::any::Any;
 use core::fmt;
 use core::fmt::{Debug, Formatter};
+use core::hash::Hasher;
 use target_lexicon::{triple, Architecture, PointerWidth, Triple};
 use thiserror::Error;

 #[cfg(feature = "riscv")]
 mod riscv;

+// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
+// included whenever building with x86 support. The new backend is the default,
+// but the old can be requested with `BackendVariant::Legacy`. However, if this
+// crate is built with the `old-x86-backend` feature, then the old backend is
+// default instead.
 #[cfg(feature = "x86")]
 mod x86;

-#[cfg(feature = "x64")]
+#[cfg(feature = "x86")]
 mod x64;

 #[cfg(feature = "arm32")]
@ -102,36 +106,68 @@ mod test_utils;
 /// Returns a builder that can create a corresponding `TargetIsa`
 /// or `Err(LookupError::SupportDisabled)` if not enabled.
 macro_rules! isa_builder {
-    ($name: ident, $feature: tt, $triple: ident) => {{
-        #[cfg(feature = $feature)]
+    ($name: ident, $cfg_terms: tt, $triple: ident) => {{
+        #[cfg $cfg_terms]
        {
            Ok($name::isa_builder($triple))
        }
-        #[cfg(not(feature = $feature))]
+        #[cfg(not $cfg_terms)]
        {
            Err(LookupError::SupportDisabled)
        }
    }};
 }

+/// The "variant" for a given target. On one platform (x86-64), we have two
+/// backends, the "old" and "new" one; the new one is the default if included
+/// in the build configuration and not otherwise specified.
+#[derive(Clone, Copy, Debug)]
+pub enum BackendVariant {
+    /// Any backend available.
+    Any,
+    /// A "legacy" backend: one that operates using legalizations and encodings.
+    Legacy,
+    /// A backend built on `MachInst`s and the `VCode` framework.
+    MachInst,
+}
+
+impl Default for BackendVariant {
+    fn default() -> Self {
+        BackendVariant::Any
+    }
+}
+
+/// Look for an ISA for the given `triple`, selecting the backend variant given
+/// by `variant` if available.
+pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder, LookupError> {
+    match (triple.architecture, variant) {
+        (Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => {
+            isa_builder!(riscv, (feature = "riscv"), triple)
+        }
+        (Architecture::X86_64, BackendVariant::Legacy) => {
+            isa_builder!(x86, (feature = "x86"), triple)
+        }
+        (Architecture::X86_64, BackendVariant::MachInst) => {
+            isa_builder!(x64, (feature = "x86"), triple)
+        }
+        #[cfg(not(feature = "old-x86-backend"))]
+        (Architecture::X86_64, BackendVariant::Any) => {
+            isa_builder!(x64, (feature = "x86"), triple)
+        }
+        #[cfg(feature = "old-x86-backend")]
+        (Architecture::X86_64, BackendVariant::Any) => {
+            isa_builder!(x86, (feature = "x86"), triple)
+        }
+        (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
+        (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
+        _ => Err(LookupError::Unsupported),
+    }
+}
+
 /// Look for an ISA for the given `triple`.
 /// Return a builder that can create a corresponding `TargetIsa`.
 pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
-    match triple.architecture {
-        Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => {
-            isa_builder!(riscv, "riscv", triple)
-        }
-        Architecture::X86_32 { .. } | Architecture::X86_64 => {
-            if cfg!(feature = "x64") {
-                isa_builder!(x64, "x64", triple)
-            } else {
-                isa_builder!(x86, "x86", triple)
-            }
-        }
-        Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
-        Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
-        _ => Err(LookupError::Unsupported),
-    }
+    lookup_variant(triple, BackendVariant::Any)
 }

 /// Look for a supported ISA with the given `name`.
@ -163,6 +199,16 @@ pub struct Builder {
 }

 impl Builder {
+    /// Gets the triple for the builder.
+    pub fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    /// Iterates the available settings in the builder.
+    pub fn iter(&self) -> impl Iterator<Item = settings::Setting> {
+        self.setup.iter()
+    }
+
    /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
    /// fully configured `TargetIsa` trait object.
    pub fn finish(self, shared_flags: settings::Flags) -> Box<dyn TargetIsa> {
@ -227,11 +273,31 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
    /// Get the ISA-independent flags that were used to make this trait object.
    fn flags(&self) -> &settings::Flags;

+    /// Get the ISA-dependent flag values that were used to make this trait object.
+    fn isa_flags(&self) -> Vec<settings::Value>;
+
+    /// Get the variant of this ISA (Legacy or MachInst).
+    fn variant(&self) -> BackendVariant {
+        BackendVariant::Legacy
+    }
+
+    /// Hashes all flags, both ISA-independent and ISA-specific, into the
+    /// specified hasher.
+    fn hash_all_flags(&self, hasher: &mut dyn Hasher);
+
    /// Get the default calling convention of this target.
    fn default_call_conv(&self) -> CallConv {
        CallConv::triple_default(self.triple())
    }

+    /// Get the endianness of this ISA.
+    fn endianness(&self) -> ir::Endianness {
+        match self.triple().endianness().unwrap() {
+            target_lexicon::Endianness::Little => ir::Endianness::Little,
+            target_lexicon::Endianness::Big => ir::Endianness::Big,
+        }
+    }
+
    /// Get the pointer type of this ISA.
    fn pointer_type(&self) -> ir::Type {
        ir::Type::int(u16::from(self.pointer_bits())).unwrap()
@ -279,6 +345,12 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
        Err(RegisterMappingError::UnsupportedArchitecture)
    }

+    #[cfg(feature = "unwind")]
+    /// Map a regalloc::Reg to its corresponding DWARF register.
+    fn map_regalloc_reg_to_dwarf(&self, _: ::regalloc::Reg) -> Result<u16, RegisterMappingError> {
+        Err(RegisterMappingError::UnsupportedArchitecture)
+    }
+
    /// Returns an iterator over legal encodings for the instruction.
    fn legal_encodings<'a>(
        &'a self,
--- a/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
@ -15,10 +15,10 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
 use crate::isa::Builder as IsaBuilder;
 use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
-use alloc::borrow::Cow;
-use alloc::boxed::Box;
+use alloc::{borrow::Cow, boxed::Box, vec::Vec};
 use core::any::Any;
 use core::fmt;
+use core::hash::{Hash, Hasher};
 use target_lexicon::{PointerWidth, Triple};

 #[allow(dead_code)]
@ -69,6 +69,15 @@ impl TargetIsa for Isa {
        &self.shared_flags
    }

+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.isa_flags.iter().collect()
+    }
+
+    fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
+        self.shared_flags.hash(&mut hasher);
+        self.isa_flags.hash(&mut hasher);
+    }
+
    fn register_info(&self) -> RegInfo {
        registers::INFO.clone()
    }
--- a/third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs
@ -1,6 +1,6 @@
 //! RISC-V Settings.

-use crate::settings::{self, detail, Builder};
+use crate::settings::{self, detail, Builder, Value};
 use core::fmt;

 // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
--- a/third_party/rust/cranelift-codegen/src/isa/unwind.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind.rs
@ -1,4 +1,7 @@
 //! Represents information relating to function unwinding.
+
+use regalloc::RealReg;
+
 #[cfg(feature = "enable-serde")]
 use serde::{Deserialize, Serialize};

@ -66,6 +69,11 @@ pub mod input {
        RememberState,
        /// Restores the state.
        RestoreState,
+        /// On aarch64 ARMv8.3+ devices, enables or disables pointer authentication.
+        Aarch64SetPointerAuth {
+            /// Whether return addresses (hold in LR) contain a pointer-authentication code.
+            return_addresses: bool,
+        },
    }

    /// Unwind information as generated by a backend.
@ -86,3 +94,155 @@ pub mod input {
        pub initial_sp_offset: u8,
    }
 }
+
+/// Unwind pseudoinstruction used in VCode backends: represents that
+/// at the present location, an action has just been taken.
+///
+/// VCode backends always emit unwind info that is relative to a frame
+/// pointer, because we are planning to allow for dynamic frame allocation,
+/// and because it makes the design quite a lot simpler in general: we don't
+/// have to be precise about SP adjustments throughout the body of the function.
+///
+/// We include only unwind info for prologues at this time. Note that unwind
+/// info for epilogues is only necessary if one expects to unwind while within
+/// the last few instructions of the function (after FP has been restored) or
+/// if one wishes to instruction-step through the epilogue and see a backtrace
+/// at every point. This is not necessary for correct operation otherwise and so
+/// we simplify the world a bit by omitting epilogue information. (Note that
+/// some platforms also don't require or have a way to describe unwind
+/// information for epilogues at all: for example, on Windows, the `UNWIND_INFO`
+/// format only stores information for the function prologue.)
+///
+/// Because we are defining an abstraction over multiple unwind formats (at
+/// least Windows/fastcall and System V) and multiple architectures (at least
+/// x86-64 and aarch64), we have to be a little bit flexible in how we describe
+/// the frame. However, it turns out that a least-common-denominator prologue
+/// works for all of the cases we have to worry about today!
+///
+/// We assume the stack looks something like this:
+///
+///
+/// ```plain
+///                  +----------------------------------------------+
+///                  | stack arg area, etc (according to ABI)       |
+///                  | ...                                          |
+///   SP at call --> +----------------------------------------------+
+///                  | return address (pushed by HW or SW)          |
+///                  +----------------------------------------------+
+///                  | old frame pointer (FP)                       |
+///   FP in this --> +----------------------------------------------+
+///   function       | clobbered callee-save registers              |
+///                  | ...                                          |
+///   start of   --> +----------------------------------------------+
+///   clobbers       | (rest of function's frame, irrelevant here)  |
+///                  | ...                                          |
+///   SP in this --> +----------------------------------------------+
+///   function
+/// ```
+///
+/// We assume that the prologue consists of:
+///
+/// * `PushFrameRegs`: A push operation that adds the old FP to the stack (and
+///    maybe the link register, on architectures that do not push return addresses
+///    in hardware)
+/// * `DefineFrame`: An update that sets FP to SP to establish a new frame
+/// * `SaveReg`: A number of stores or pushes to the stack to save clobbered registers
+///
+/// Each of these steps has a corresponding pseudo-instruction. At each step,
+/// we need some information to determine where the current stack frame is
+/// relative to SP or FP. When the `PushFrameRegs` occurs, we need to know how
+/// much SP was decremented by, so we can allow the unwinder to continue to find
+/// the caller's frame. When we define the new frame, we need to know where FP
+/// is in relation to "SP at call" and also "start of clobbers", because
+/// different unwind formats define one or the other of those as the anchor by
+/// which we define the frame. Finally, when registers are saved, we need to
+/// know which ones, and where.
+///
+/// Different unwind formats work differently; here is a whirlwind tour of how
+/// they define frames to help understanding:
+///
+/// - Windows unwind information defines a frame that must start below the
+///   clobber area, because all clobber-save offsets are non-negative. We set it
+///   at the "start of clobbers" in the figure above. The `UNWIND_INFO` contains
+///   a "frame pointer offset" field; when we define the new frame, the frame is
+///   understood to be the value of FP (`RBP`) *minus* this offset. In other
+///   words, the FP is *at the frame pointer offset* relative to the
+///   start-of-clobber-frame. We use the "FP offset down to clobber area" offset
+///   to generate this info.
+///
+/// - System V unwind information defines a frame in terms of the CFA
+///   (call-frame address), which is equal to the "SP at call" above. SysV
+///   allows negative offsets, so there is no issue defining clobber-save
+///   locations in terms of CFA. The format allows us to define CFA flexibly in
+///   terms of any register plus an offset; we define it in terms of FP plus
+///   the clobber-to-caller-SP offset once FP is established.
+///
+/// Note that certain architectures impose limits on offsets: for example, on
+/// Windows, the base of the clobber area must not be more than 240 bytes below
+/// FP.
+///
+/// Unwind pseudoinstructions are emitted inline by ABI code as it generates
+/// a prologue. Thus, for the usual case, a prologue might look like (using x64
+/// as an example):
+///
+/// ```plain
+/// push rbp
+/// unwind UnwindInst::PushFrameRegs { offset_upward_to_caller_sp: 16 }
+/// mov rbp, rsp
+/// unwind UnwindInst::DefineNewFrame { offset_upward_to_caller_sp: 16,
+///                                     offset_downward_to_clobbers: 16 }
+/// sub rsp, 32
+/// mov [rsp+16], r12
+/// unwind UnwindInst::SaveReg { reg: R12, clobber_offset: 0 }
+/// mov [rsp+24], r13
+/// unwind UnwindInst::SaveReg { reg: R13, clobber_offset: 8 }
+/// ...
+/// ```
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub enum UnwindInst {
+    /// The frame-pointer register for this architecture has just been pushed to
+    /// the stack (and on architectures where return-addresses are not pushed by
+    /// hardware, the link register as well). The FP has not been set to this
+    /// frame yet. The current location of SP is such that
+    /// `offset_upward_to_caller_sp` is the distance to SP-at-callsite (our
+    /// caller's frame).
+    PushFrameRegs {
+        /// The offset from the current SP (after push) to the SP at
+        /// caller's callsite.
+        offset_upward_to_caller_sp: u32,
+    },
+    /// The frame-pointer register for this architecture has just been
+    /// set to the current stack location. We wish to define a new
+    /// frame that is anchored on this new FP value. Offsets are provided
+    /// upward to the caller's stack frame and downward toward the clobber
+    /// area. We expect this pseudo-op to come after `PushFrameRegs`.
+    DefineNewFrame {
+        /// The offset from the current SP and FP value upward to the value of
+        /// SP at the callsite that invoked us.
+        offset_upward_to_caller_sp: u32,
+        /// The offset from the current SP and FP value downward to the start of
+        /// the clobber area.
+        offset_downward_to_clobbers: u32,
+    },
+    /// The stack slot at the given offset from the clobber-area base has been
+    /// used to save the given register.
+    ///
+    /// Given that `CreateFrame` has occurred first with some
+    /// `offset_downward_to_clobbers`, `SaveReg` with `clobber_offset` indicates
+    /// that the value of `reg` is saved on the stack at address `FP -
+    /// offset_downward_to_clobbers + clobber_offset`.
+    SaveReg {
+        /// The offset from the start of the clobber area to this register's
+        /// stack location.
+        clobber_offset: u32,
+        /// The saved register.
+        reg: RealReg,
+    },
+    /// Defines if the aarch64-specific pointer authentication available for ARM v8.3+ devices
+    /// is enabled for certain pointers or not.
+    Aarch64SetPointerAuth {
+        /// Whether return addresses (hold in LR) contain a pointer-authentication code.
+        return_addresses: bool,
+    },
+}
--- a/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
@ -1,6 +1,8 @@
 //! System V ABI unwind information.

+use crate::binemit::CodeOffset;
 use crate::isa::unwind::input;
+use crate::isa::unwind::UnwindInst;
 use crate::result::{CodegenError, CodegenResult};
 use alloc::vec::Vec;
 use gimli::write::{Address, FrameDescriptionEntry};
@ -42,6 +44,11 @@ pub(crate) enum CallFrameInstruction {
    RememberState,
    RestoreState,
    ArgsSize(u32),
+    /// Enables or disables pointer authentication on aarch64 platforms post ARMv8.3.  This
+    /// particular item maps to gimli::ValExpression(RA_SIGN_STATE, lit0/lit1).
+    Aarch64SetPointerAuth {
+        return_addresses: bool,
+    },
 }

 impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
@ -73,7 +80,7 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {

 impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
    fn into(self) -> gimli::write::CallFrameInstruction {
-        use gimli::{write::CallFrameInstruction, Register};
+        use gimli::{write::CallFrameInstruction, write::Expression, Register};

        match self {
            Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
@ -90,6 +97,21 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
            Self::RememberState => CallFrameInstruction::RememberState,
            Self::RestoreState => CallFrameInstruction::RestoreState,
            Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
+            Self::Aarch64SetPointerAuth { return_addresses } => {
+                // To enable pointer authentication for return addresses in dwarf directives, we
+                // use a small dwarf expression that sets the value of the pseudo-register
+                // RA_SIGN_STATE (RA stands for return address) to 0 or 1. This behavior is
+                // documented in
+                // https://github.com/ARM-software/abi-aa/blob/master/aadwarf64/aadwarf64.rst#41dwarf-register-names.
+                let mut expr = Expression::new();
+                expr.op(if return_addresses {
+                    gimli::DW_OP_lit1
+                } else {
+                    gimli::DW_OP_lit0
+                });
+                const RA_SIGN_STATE: Register = Register(34);
+                CallFrameInstruction::ValExpression(RA_SIGN_STATE, expr)
+            }
        }
    }
 }
@ -100,6 +122,16 @@ pub(crate) trait RegisterMapper<Reg> {
    fn map(&self, reg: Reg) -> Result<Register, RegisterMappingError>;
    /// Gets stack pointer register.
    fn sp(&self) -> Register;
+    /// Gets the frame pointer register.
+    fn fp(&self) -> Register;
+    /// Gets the link register, if any.
+    fn lr(&self) -> Option<Register> {
+        None
+    }
+    /// What is the offset from saved FP to saved LR?
+    fn lr_offset(&self) -> Option<u32> {
+        None
+    }
 }

 /// Represents unwind information for a single System V ABI function.
@ -112,7 +144,88 @@ pub struct UnwindInfo {
    len: u32,
 }

+pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
+    insts: &[(CodeOffset, UnwindInst)],
+    code_len: usize,
+    mr: &MR,
+) -> CodegenResult<UnwindInfo> {
+    let mut instructions = vec![];
+
+    let mut clobber_offset_to_cfa = 0;
+    for &(instruction_offset, ref inst) in insts {
+        match inst {
+            &UnwindInst::PushFrameRegs {
+                offset_upward_to_caller_sp,
+            } => {
+                // Define CFA in terms of current SP (SP changed and we haven't
+                // set FP yet).
+                instructions.push((
+                    instruction_offset,
+                    CallFrameInstruction::CfaOffset(offset_upward_to_caller_sp as i32),
+                ));
+                // Note that we saved the old FP value on the stack.
+                instructions.push((
+                    instruction_offset,
+                    CallFrameInstruction::Offset(mr.fp(), -(offset_upward_to_caller_sp as i32)),
+                ));
+                // If there is a link register on this architecture, note that
+                // we saved it as well.
+                if let Some(lr) = mr.lr() {
+                    instructions.push((
+                        instruction_offset,
+                        CallFrameInstruction::Offset(
+                            lr,
+                            -(offset_upward_to_caller_sp as i32)
+                                + mr.lr_offset().expect("LR offset not provided") as i32,
+                        ),
+                    ));
+                }
+            }
+            &UnwindInst::DefineNewFrame {
+                offset_upward_to_caller_sp,
+                offset_downward_to_clobbers,
+            } => {
+                // Define CFA in terms of FP. Note that we assume it was already
+                // defined correctly in terms of the current SP, and FP has just
+                // been set to the current SP, so we do not need to change the
+                // offset, only the register.
+                instructions.push((
+                    instruction_offset,
+                    CallFrameInstruction::CfaRegister(mr.fp()),
+                ));
+                // Record distance from CFA downward to clobber area so we can
+                // express clobber offsets later in terms of CFA.
+                clobber_offset_to_cfa = offset_upward_to_caller_sp + offset_downward_to_clobbers;
+            }
+            &UnwindInst::SaveReg {
+                clobber_offset,
+                reg,
+            } => {
+                let reg = mr
+                    .map(reg.to_reg())
+                    .map_err(|e| CodegenError::RegisterMappingError(e))?;
+                let off = (clobber_offset as i32) - (clobber_offset_to_cfa as i32);
+                instructions.push((instruction_offset, CallFrameInstruction::Offset(reg, off)));
+            }
+            &UnwindInst::Aarch64SetPointerAuth { return_addresses } => {
+                instructions.push((
+                    instruction_offset,
+                    CallFrameInstruction::Aarch64SetPointerAuth { return_addresses },
+                ));
+            }
+        }
+    }
+
+    Ok(UnwindInfo {
+        instructions,
+        len: code_len as u32,
+    })
+}
+
 impl UnwindInfo {
+    // TODO: remove `build()` below when old backend is removed. The new backend uses a simpler
+    // approach in `create_unwind_info_from_insts()` above.
+
    pub(crate) fn build<'b, Reg: PartialEq + Copy>(
        unwind: input::UnwindInfo<Reg>,
        map_reg: &'b dyn RegisterMapper<Reg>,
@ -158,6 +271,9 @@ impl UnwindInfo {
                UnwindCode::RestoreState => {
                    builder.restore_state(*offset);
                }
+                UnwindCode::Aarch64SetPointerAuth { return_addresses } => {
+                    builder.set_aarch64_pauth(*offset, *return_addresses);
+                }
            }
        }

@ -179,6 +295,8 @@ impl UnwindInfo {
    }
 }

+// TODO: delete the builder below when the old backend is removed.
+
 struct InstructionBuilder<'a, Reg: PartialEq + Copy> {
    sp_offset: i32,
    frame_register: Option<Reg>,
@ -310,4 +428,11 @@ impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> {
        self.instructions
            .push((offset, CallFrameInstruction::RestoreState));
    }
+
+    fn set_aarch64_pauth(&mut self, offset: u32, return_addresses: bool) {
+        self.instructions.push((
+            offset,
+            CallFrameInstruction::Aarch64SetPointerAuth { return_addresses },
+        ));
+    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
@ -1,6 +1,6 @@
 //! Windows x64 ABI unwind information.

-use crate::isa::{unwind::input, RegUnit};
+use crate::isa::unwind::input;
 use crate::result::{CodegenError, CodegenResult};
 use alloc::vec::Vec;
 use byteorder::{ByteOrder, LittleEndian};
@ -8,6 +8,9 @@ use log::warn;
 #[cfg(feature = "enable-serde")]
 use serde::{Deserialize, Serialize};

+use crate::binemit::CodeOffset;
+use crate::isa::unwind::UnwindInst;
+
 /// Maximum (inclusive) size of a "small" stack allocation
 const SMALL_ALLOC_MAX_SIZE: u32 = 128;
 /// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
@ -41,25 +44,34 @@ impl<'a> Writer<'a> {

 /// The supported unwind codes for the x64 Windows ABI.
 ///
-/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
+/// See: <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
 /// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
 /// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
+#[allow(dead_code)]
 #[derive(Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub(crate) enum UnwindCode {
    PushRegister {
-        offset: u8,
+        instruction_offset: u8,
        reg: u8,
    },
+    SaveReg {
+        instruction_offset: u8,
+        reg: u8,
+        stack_offset: u32,
+    },
    SaveXmm {
-        offset: u8,
+        instruction_offset: u8,
        reg: u8,
        stack_offset: u32,
    },
    StackAlloc {
-        offset: u8,
+        instruction_offset: u8,
        size: u32,
    },
+    SetFPReg {
+        instruction_offset: u8,
+    },
 }

 impl UnwindCode {
@ -68,37 +80,63 @@ impl UnwindCode {
            PushNonvolatileRegister = 0,
            LargeStackAlloc = 1,
            SmallStackAlloc = 2,
+            SetFPReg = 3,
+            SaveNonVolatileRegister = 4,
+            SaveNonVolatileRegisterFar = 5,
            SaveXmm128 = 8,
            SaveXmm128Far = 9,
        }

        match self {
-            Self::PushRegister { offset, reg } => {
-                writer.write_u8(*offset);
+            Self::PushRegister {
+                instruction_offset,
+                reg,
+            } => {
+                writer.write_u8(*instruction_offset);
                writer.write_u8((*reg << 4) | (UnwindOperation::PushNonvolatileRegister as u8));
            }
-            Self::SaveXmm {
-                offset,
+            Self::SaveReg {
+                instruction_offset,
+                reg,
+                stack_offset,
+            }
+            | Self::SaveXmm {
+                instruction_offset,
                reg,
                stack_offset,
            } => {
-                writer.write_u8(*offset);
+                let is_xmm = match self {
+                    Self::SaveXmm { .. } => true,
+                    _ => false,
+                };
+                let (op_small, op_large) = if is_xmm {
+                    (UnwindOperation::SaveXmm128, UnwindOperation::SaveXmm128Far)
+                } else {
+                    (
+                        UnwindOperation::SaveNonVolatileRegister,
+                        UnwindOperation::SaveNonVolatileRegisterFar,
+                    )
+                };
+                writer.write_u8(*instruction_offset);
                let scaled_stack_offset = stack_offset / 16;
                if scaled_stack_offset <= core::u16::MAX as u32 {
-                    writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
+                    writer.write_u8((*reg << 4) | (op_small as u8));
                    writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
                } else {
-                    writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
+                    writer.write_u8((*reg << 4) | (op_large as u8));
                    writer.write_u16::<LittleEndian>(*stack_offset as u16);
                    writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
                }
            }
-            Self::StackAlloc { offset, size } => {
+            Self::StackAlloc {
+                instruction_offset,
+                size,
+            } => {
                // Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
                assert!(*size >= 8);
                assert!((*size % 8) == 0);

-                writer.write_u8(*offset);
+                writer.write_u8(*instruction_offset);
                if *size <= SMALL_ALLOC_MAX_SIZE {
                    writer.write_u8(
                        ((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
@ -111,7 +149,11 @@ impl UnwindCode {
                    writer.write_u32::<LittleEndian>(*size);
                }
            }
-        };
+            Self::SetFPReg { instruction_offset } => {
+                writer.write_u8(*instruction_offset);
+                writer.write_u8(UnwindOperation::SetFPReg as u8);
+            }
+        }
    }

    fn node_count(&self) -> usize {
@ -125,7 +167,7 @@ impl UnwindCode {
                    3
                }
            }
-            Self::SaveXmm { stack_offset, .. } => {
+            Self::SaveXmm { stack_offset, .. } | Self::SaveReg { stack_offset, .. } => {
                if *stack_offset <= core::u16::MAX as u32 {
                    2
                } else {
@ -143,15 +185,15 @@ pub(crate) enum MappedRegister {
 }

 /// Maps UnwindInfo register to Windows x64 unwind data.
-pub(crate) trait RegisterMapper {
-    /// Maps RegUnit.
-    fn map(reg: RegUnit) -> MappedRegister;
+pub(crate) trait RegisterMapper<Reg> {
+    /// Maps a Reg to a Windows unwind register number.
+    fn map(reg: Reg) -> MappedRegister;
 }

 /// Represents Windows x64 unwind information.
 ///
 /// For information about Windows x64 unwind info, see:
-/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
+/// <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
 #[derive(Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct UnwindInfo {
@ -219,8 +261,11 @@ impl UnwindInfo {
            .fold(0, |nodes, c| nodes + c.node_count())
    }

-    pub(crate) fn build<MR: RegisterMapper>(
-        unwind: input::UnwindInfo<RegUnit>,
+    // TODO: remove `build()` below when old backend is removed. The new backend uses
+    // a simpler approach in `create_unwind_info_from_insts()` below.
+
+    pub(crate) fn build<Reg: PartialEq + Copy + std::fmt::Debug, MR: RegisterMapper<Reg>>(
+        unwind: input::UnwindInfo<Reg>,
    ) -> CodegenResult<Self> {
        use crate::isa::unwind::input::UnwindCode as InputUnwindCode;

@ -237,7 +282,7 @@ impl UnwindInfo {
                            // `StackAlloc { size = word_size }`, `SaveRegister { stack_offset: 0 }`
                            // to the shorter `UnwindCode::PushRegister`.
                            let push_reg_sequence = if let Some(UnwindCode::StackAlloc {
-                                offset: alloc_offset,
+                                instruction_offset: alloc_offset,
                                size,
                            }) = unwind_codes.last()
                            {
@ -246,19 +291,21 @@ impl UnwindInfo {
                                false
                            };
                            if push_reg_sequence {
-                                *unwind_codes.last_mut().unwrap() =
-                                    UnwindCode::PushRegister { offset, reg };
+                                *unwind_codes.last_mut().unwrap() = UnwindCode::PushRegister {
+                                    instruction_offset: offset,
+                                    reg,
+                                };
                            } else {
-                                // TODO add `UnwindCode::SaveRegister` to handle multiple register
-                                // pushes with single `UnwindCode::StackAlloc`.
-                                return Err(CodegenError::Unsupported(
-                                    "Unsupported UnwindCode::PushRegister sequence".into(),
-                                ));
+                                unwind_codes.push(UnwindCode::SaveReg {
+                                    instruction_offset: offset,
+                                    reg,
+                                    stack_offset: *stack_offset,
+                                });
                            }
                        }
                        MappedRegister::Xmm(reg) => {
                            unwind_codes.push(UnwindCode::SaveXmm {
-                                offset,
+                                instruction_offset: offset,
                                reg,
                                stack_offset: *stack_offset,
                            });
@ -267,7 +314,7 @@ impl UnwindInfo {
                }
                InputUnwindCode::StackAlloc { size } => {
                    unwind_codes.push(UnwindCode::StackAlloc {
-                        offset: ensure_unwind_offset(*offset)?,
+                        instruction_offset: ensure_unwind_offset(*offset)?,
                        size: *size,
                    });
                }
@ -285,6 +332,65 @@ impl UnwindInfo {
    }
 }

+const UNWIND_RBP_REG: u8 = 5;
+
+pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
+    insts: &[(CodeOffset, UnwindInst)],
+) -> CodegenResult<UnwindInfo> {
+    let mut unwind_codes = vec![];
+    let mut frame_register_offset = 0;
+    let mut max_unwind_offset = 0;
+    for &(instruction_offset, ref inst) in insts {
+        let instruction_offset = ensure_unwind_offset(instruction_offset)?;
+        match inst {
+            &UnwindInst::PushFrameRegs { .. } => {
+                unwind_codes.push(UnwindCode::PushRegister {
+                    instruction_offset,
+                    reg: UNWIND_RBP_REG,
+                });
+            }
+            &UnwindInst::DefineNewFrame {
+                offset_downward_to_clobbers,
+                ..
+            } => {
+                frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?;
+                unwind_codes.push(UnwindCode::SetFPReg { instruction_offset });
+            }
+            &UnwindInst::SaveReg {
+                clobber_offset,
+                reg,
+            } => match MR::map(reg.to_reg()) {
+                MappedRegister::Int(reg) => {
+                    unwind_codes.push(UnwindCode::SaveReg {
+                        instruction_offset,
+                        reg,
+                        stack_offset: clobber_offset,
+                    });
+                }
+                MappedRegister::Xmm(reg) => {
+                    unwind_codes.push(UnwindCode::SaveXmm {
+                        instruction_offset,
+                        reg,
+                        stack_offset: clobber_offset,
+                    });
+                }
+            },
+            &UnwindInst::Aarch64SetPointerAuth { .. } => {
+                unreachable!("no aarch64 on x64");
+            }
+        }
+        max_unwind_offset = instruction_offset;
+    }
+
+    Ok(UnwindInfo {
+        flags: 0,
+        prologue_size: max_unwind_offset,
+        frame_register: Some(UNWIND_RBP_REG),
+        frame_register_offset,
+        unwind_codes,
+    })
+}
+
 fn ensure_unwind_offset(offset: u32) -> CodegenResult<u8> {
    if offset > 255 {
        warn!("function prologues cannot exceed 255 bytes in size for Windows x64");
--- a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
@ -1,9 +1,9 @@
 //! Implementation of the standard x64 ABI.

 use crate::ir::types::*;
-use crate::ir::{self, types, MemFlags, TrapCode, Type};
+use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
 use crate::isa;
-use crate::isa::{x64::inst::*, CallConv};
+use crate::isa::{unwind::UnwindInst, x64::inst::*, CallConv};
 use crate::machinst::abi_impl::*;
 use crate::machinst::*;
 use crate::settings;
@ -31,7 +31,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
        match &param.purpose {
            &ir::ArgumentPurpose::VMContext => {
                // This is SpiderMonkey's `WasmTlsReg`.
-                Some(ABIArg::Reg(
+                Some(ABIArg::reg(
                    regs::r14().to_real_reg(),
                    types::I64,
                    param.extension,
@ -40,7 +40,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
            }
            &ir::ArgumentPurpose::SignatureId => {
                // This is SpiderMonkey's `WasmTableCallSigReg`.
-                Some(ABIArg::Reg(
+                Some(ABIArg::reg(
                    regs::r10().to_real_reg(),
                    types::I64,
                    param.extension,
@ -50,7 +50,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
            &ir::ArgumentPurpose::CalleeTLS => {
                // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
                assert!(call_conv == isa::CallConv::Baldrdash2020);
-                Some(ABIArg::Stack(
+                Some(ABIArg::stack(
                    BALDRDASH_CALLEE_TLS_OFFSET,
                    ir::types::I64,
                    ir::ArgumentExtension::None,
@ -60,7 +60,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
            &ir::ArgumentPurpose::CallerTLS => {
                // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
                assert!(call_conv == isa::CallConv::Baldrdash2020);
-                Some(ABIArg::Stack(
+                Some(ABIArg::stack(
                    BALDRDASH_CALLER_TLS_OFFSET,
                    ir::types::I64,
                    ir::ArgumentExtension::None,
@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {

    fn compute_arg_locs(
        call_conv: isa::CallConv,
+        flags: &settings::Flags,
        params: &[ir::AbiParam],
        args_or_rets: ArgsOrRets,
        add_ret_area_ptr: bool,
    ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
        let is_baldrdash = call_conv.extends_baldrdash();
+        let is_fastcall = call_conv.extends_windows_fastcall();
        let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;

        let mut next_gpr = 0;
        let mut next_vreg = 0;
        let mut next_stack: u64 = 0;
+        let mut next_param_idx = 0; // Fastcall cares about overall param index
        let mut ret = vec![];

+        if args_or_rets == ArgsOrRets::Args && is_fastcall {
+            // Fastcall always reserves 32 bytes of shadow space corresponding to
+            // the four initial in-arg parameters.
+            //
+            // (See:
+            // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
+            next_stack = 32;
+        }
+
        if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
            // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
            // caller TLS-register values, respectively.
@ -131,65 +143,120 @@ impl ABIMachineSpec for X64ABIMachineSpec {
                | &ir::ArgumentPurpose::StackLimit
                | &ir::ArgumentPurpose::SignatureId
                | &ir::ArgumentPurpose::CalleeTLS
-                | &ir::ArgumentPurpose::CallerTLS => {}
+                | &ir::ArgumentPurpose::CallerTLS
+                | &ir::ArgumentPurpose::StructReturn
+                | &ir::ArgumentPurpose::StructArgument(_) => {}
                _ => panic!(
                    "Unsupported argument purpose {:?} in signature: {:?}",
                    param.purpose, params
                ),
            }

-            let intreg = in_int_reg(param.value_type);
-            let vecreg = in_vec_reg(param.value_type);
-            debug_assert!(intreg || vecreg);
-            debug_assert!(!(intreg && vecreg));
-
-            let (next_reg, candidate) = if intreg {
-                let candidate = match args_or_rets {
-                    ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
-                    ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
-                };
-                debug_assert!(candidate
-                    .map(|r| r.get_class() == RegClass::I64)
-                    .unwrap_or(true));
-                (&mut next_gpr, candidate)
-            } else {
-                let candidate = match args_or_rets {
-                    ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
-                    ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
-                };
-                debug_assert!(candidate
-                    .map(|r| r.get_class() == RegClass::V128)
-                    .unwrap_or(true));
-                (&mut next_vreg, candidate)
-            };
-
            if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
-                assert!(intreg);
                ret.push(param);
-            } else if let Some(reg) = candidate {
-                ret.push(ABIArg::Reg(
-                    reg.to_real_reg(),
-                    param.value_type,
-                    param.extension,
-                    param.purpose,
-                ));
-                *next_reg += 1;
-            } else {
-                // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
-                // stack alignment happens separately after all args.)
-                let size = (param.value_type.bits() / 8) as u64;
-                let size = std::cmp::max(size, 8);
-                // Align.
-                debug_assert!(size.is_power_of_two());
-                next_stack = (next_stack + size - 1) & !(size - 1);
-                ret.push(ABIArg::Stack(
-                    next_stack as i64,
-                    param.value_type,
-                    param.extension,
-                    param.purpose,
-                ));
-                next_stack += size;
+                continue;
            }
+
+            if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
+                let offset = next_stack as i64;
+                let size = size as u64;
+                assert!(size % 8 == 0, "StructArgument size is not properly aligned");
+                next_stack += size;
+                ret.push(ABIArg::StructArg {
+                    offset,
+                    size,
+                    purpose: param.purpose,
+                });
+                continue;
+            }
+
+            // Find regclass(es) of the register(s) used to store a value of this type.
+            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
+
+            // Now assign ABIArgSlots for each register-sized part.
+            //
+            // Note that the handling of `i128` values is unique here:
+            //
+            // - If `enable_llvm_abi_extensions` is set in the flags, each
+            //   `i128` is split into two `i64`s and assigned exactly as if it
+            //   were two consecutive 64-bit args. This is consistent with LLVM's
+            //   behavior, and is needed for some uses of Cranelift (e.g., the
+            //   rustc backend).
+            //
+            // - Otherwise, both SysV and Fastcall specify behavior (use of
+            //   vector register, a register pair, or passing by reference
+            //   depending on the case), but for simplicity, we will just panic if
+            //   an i128 type appears in a signature and the LLVM extensions flag
+            //   is not set.
+            //
+            // For examples of how rustc compiles i128 args and return values on
+            // both SysV and Fastcall platforms, see:
+            // https://godbolt.org/z/PhG3ob
+
+            if param.value_type.bits() > 64
+                && !param.value_type.is_vector()
+                && !flags.enable_llvm_abi_extensions()
+            {
+                panic!(
+                    "i128 args/return values not supported unless LLVM ABI extensions are enabled"
+                );
+            }
+
+            let mut slots = vec![];
+            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
+                let intreg = *rc == RegClass::I64;
+                let nextreg = if intreg {
+                    match args_or_rets {
+                        ArgsOrRets::Args => {
+                            get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
+                        }
+                        ArgsOrRets::Rets => {
+                            get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
+                        }
+                    }
+                } else {
+                    match args_or_rets {
+                        ArgsOrRets::Args => {
+                            get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
+                        }
+                        ArgsOrRets::Rets => {
+                            get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
+                        }
+                    }
+                };
+                next_param_idx += 1;
+                if let Some(reg) = nextreg {
+                    if intreg {
+                        next_gpr += 1;
+                    } else {
+                        next_vreg += 1;
+                    }
+                    slots.push(ABIArgSlot::Reg {
+                        reg: reg.to_real_reg(),
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                } else {
+                    // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
+                    // stack alignment happens separately after all args.)
+                    let size = (reg_ty.bits() / 8) as u64;
+                    let size = std::cmp::max(size, 8);
+                    // Align.
+                    debug_assert!(size.is_power_of_two());
+                    next_stack = align_to(next_stack, size);
+                    slots.push(ABIArgSlot::Stack {
+                        offset: next_stack as i64,
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                    next_stack += size;
+                }
+            }
+
+            ret.push(ABIArg::Slots {
+                slots,
+                purpose: param.purpose,
+            });
        }

        if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
@ -198,15 +265,15 @@ impl ABIMachineSpec for X64ABIMachineSpec {

        let extra_arg = if add_ret_area_ptr {
            debug_assert!(args_or_rets == ArgsOrRets::Args);
-            if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
-                ret.push(ABIArg::Reg(
+            if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
+                ret.push(ABIArg::reg(
                    reg.to_real_reg(),
                    types::I64,
                    ir::ArgumentExtension::None,
                    ir::ArgumentPurpose::Normal,
                ));
            } else {
-                ret.push(ABIArg::Stack(
+                ret.push(ABIArg::stack(
                    next_stack as i64,
                    types::I64,
                    ir::ArgumentExtension::None,
@ -219,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
            None
        };

-        next_stack = (next_stack + 15) & !15;
+        next_stack = align_to(next_stack, 16);

        // To avoid overflow issues, limit the arg/return size to something reasonable.
        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
@ -288,13 +355,13 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        Inst::epilogue_placeholder()
    }

-    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> {
+    fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Self::I> {
        let mut ret = SmallVec::new();
        if from_reg != into_reg.to_reg() {
            ret.push(Inst::gen_move(into_reg, from_reg, I64));
        }
        ret.push(Inst::alu_rmi_r(
-            true,
+            OperandSize::Size64,
            AluRmiROpcode::Add,
            RegMemImm::imm(imm),
            into_reg,
@ -302,9 +369,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        ret
    }

-    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> {
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
        smallvec![
-            Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg),
+            Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(regs::rsp()), limit_reg),
            Inst::TrapIf {
                // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
                cc: CC::NBE,
@ -343,7 +410,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        Inst::store(ty, from_reg, mem)
    }

-    fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> {
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
        let (alu_op, amount) = if amount >= 0 {
            (AluRmiROpcode::Add, amount)
        } else {
@ -353,7 +420,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        let amount = amount as u32;

        smallvec![Inst::alu_rmi_r(
-            true,
+            OperandSize::Size64,
            alu_op,
            RegMemImm::imm(amount),
            Writable::from_reg(regs::rsp()),
@ -366,71 +433,126 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        }
    }

-    fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> {
+    fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I> {
        let r_rsp = regs::rsp();
        let r_rbp = regs::rbp();
        let w_rbp = Writable::from_reg(r_rbp);
        let mut insts = SmallVec::new();
+        // `push %rbp`
        // RSP before the call will be 0 % 16.  So here, it is 8 % 16.
        insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
+
+        if flags.unwind_info() {
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::PushFrameRegs {
+                    offset_upward_to_caller_sp: 16, // RBP, return address
+                },
+            });
+        }
+
+        // `mov %rsp, %rbp`
        // RSP is now 0 % 16
-        insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
+        insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp));
        insts
    }

-    fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> {
+    fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Self::I> {
        let mut insts = SmallVec::new();
+        // `mov %rbp, %rsp`
        insts.push(Inst::mov_r_r(
-            true,
+            OperandSize::Size64,
            regs::rbp(),
            Writable::from_reg(regs::rsp()),
        ));
+        // `pop %rbp`
        insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
        insts
    }

+    fn gen_probestack(frame_size: u32) -> SmallInstVec<Self::I> {
+        let mut insts = SmallVec::new();
+        insts.push(Inst::imm(
+            OperandSize::Size32,
+            frame_size as u64,
+            Writable::from_reg(regs::rax()),
+        ));
+        insts.push(Inst::CallKnown {
+            dest: ExternalName::LibCall(LibCall::Probestack),
+            uses: vec![regs::rax()],
+            defs: vec![],
+            opcode: Opcode::Call,
+        });
+        insts
+    }
+
    fn gen_clobber_save(
        call_conv: isa::CallConv,
-        _: &settings::Flags,
+        flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
        fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
    ) -> (u64, SmallVec<[Self::I; 16]>) {
        let mut insts = SmallVec::new();
-        // Find all clobbered registers that are callee-save. These are only I64
-        // registers (all XMM registers are caller-save) so we can compute the
-        // total size of the needed stack space easily.
+        // Find all clobbered registers that are callee-save.
        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let clobbered_size = 8 * clobbered.len() as u32;
-        let stack_size = clobbered_size + fixed_frame_storage_size;
-        // Align to 16 bytes.
-        let stack_size = (stack_size + 15) & !15;
-        // Adjust the stack pointer downward with one `sub rsp, IMM`
-        // instruction.
+        let clobbered_size = compute_clobber_size(&clobbered);
+
+        if flags.unwind_info() {
+            // Emit unwind info: start the frame. The frame (from unwind
+            // consumers' point of view) starts at clobbbers, just below
+            // the FP and return address. Spill slots and stack slots are
+            // part of our actual frame but do not concern the unwinder.
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::DefineNewFrame {
+                    offset_downward_to_clobbers: clobbered_size,
+                    offset_upward_to_caller_sp: 16, // RBP, return address
+                },
+            });
+        }
+
+        // Adjust the stack pointer downward for clobbers and the function fixed
+        // frame (spillslots and storage slots).
+        let stack_size = fixed_frame_storage_size + clobbered_size;
        if stack_size > 0 {
            insts.push(Inst::alu_rmi_r(
-                true,
+                OperandSize::Size64,
                AluRmiROpcode::Sub,
                RegMemImm::imm(stack_size),
                Writable::from_reg(regs::rsp()),
            ));
        }
-        // Store each clobbered register in order at offsets from RSP.
-        let mut cur_offset = 0;
+        // Store each clobbered register in order at offsets from RSP,
+        // placing them above the fixed frame slots.
+        let mut cur_offset = fixed_frame_storage_size;
        for reg in &clobbered {
            let r_reg = reg.to_reg();
+            let off = cur_offset;
            match r_reg.get_class() {
                RegClass::I64 => {
-                    insts.push(Inst::mov_r_m(
-                        /* bytes = */ 8,
+                    insts.push(Inst::store(
+                        types::I64,
                        r_reg.to_reg(),
                        Amode::imm_reg(cur_offset, regs::rsp()),
                    ));
                    cur_offset += 8;
                }
-                // No XMM regs are callee-save, so we do not need to implement
-                // this.
-                _ => unimplemented!(),
+                RegClass::V128 => {
+                    cur_offset = align_to(cur_offset, 16);
+                    insts.push(Inst::store(
+                        types::I8X16,
+                        r_reg.to_reg(),
+                        Amode::imm_reg(cur_offset, regs::rsp()),
+                    ));
+                    cur_offset += 16;
+                }
+                _ => unreachable!(),
+            };
+            if flags.unwind_info() {
+                insts.push(Inst::Unwind {
+                    inst: UnwindInst::SaveReg {
+                        clobber_offset: off - fixed_frame_storage_size,
+                        reg: r_reg,
+                    },
+                });
            }
        }

@ -441,17 +563,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        call_conv: isa::CallConv,
        flags: &settings::Flags,
        clobbers: &Set<Writable<RealReg>>,
-        _fixed_frame_storage_size: u32,
-        _outgoing_args_size: u32,
+        fixed_frame_storage_size: u32,
    ) -> SmallVec<[Self::I; 16]> {
        let mut insts = SmallVec::new();

        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let stack_size = 8 * clobbered.len() as u32;
-        let stack_size = (stack_size + 15) & !15;
+        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered);

-        // Restore regs by loading from offsets of RSP.
-        let mut cur_offset = 0;
+        // Restore regs by loading from offsets of RSP. RSP will be
+        // returned to nominal-RSP at this point, so we can use the
+        // same offsets that we used when saving clobbers above.
+        let mut cur_offset = fixed_frame_storage_size;
        for reg in &clobbered {
            let rreg = reg.to_reg();
            match rreg.get_class() {
@ -462,13 +584,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
                    ));
                    cur_offset += 8;
                }
-                _ => unimplemented!(),
+                RegClass::V128 => {
+                    cur_offset = align_to(cur_offset, 16);
+                    insts.push(Inst::load(
+                        types::I8X16,
+                        Amode::imm_reg(cur_offset, regs::rsp()),
+                        Writable::from_reg(rreg.to_reg()),
+                        ExtKind::None,
+                    ));
+                    cur_offset += 16;
+                }
+                _ => unreachable!(),
            }
        }
        // Adjust RSP back upward.
        if stack_size > 0 {
            insts.push(Inst::alu_rmi_r(
-                true,
+                OperandSize::Size64,
                AluRmiROpcode::Add,
                RegMemImm::imm(stack_size),
                Writable::from_reg(regs::rsp()),
@ -531,6 +663,51 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        insts
    }

+    fn gen_memcpy(
+        call_conv: isa::CallConv,
+        dst: Reg,
+        src: Reg,
+        size: usize,
+    ) -> SmallVec<[Self::I; 8]> {
+        // Baldrdash should not use struct args.
+        assert!(!call_conv.extends_baldrdash());
+        let mut insts = SmallVec::new();
+        let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
+        let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
+        let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
+        // We need a register to load the address of `memcpy()` below and we
+        // don't have a lowering context to allocate a temp here; so just use a
+        // register we know we are free to mutate as part of this sequence
+        // (because it is clobbered by the call as per the ABI anyway).
+        let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
+        insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
+        insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
+        insts.extend(
+            Inst::gen_constant(
+                ValueRegs::one(Writable::from_reg(arg2)),
+                size as u128,
+                I64,
+                |_| panic!("tmp should not be needed"),
+            )
+            .into_iter(),
+        );
+        // We use an indirect call and a full LoadExtName because we do not have
+        // information about the libcall `RelocDistance` here, so we
+        // conservatively use the more flexible calling sequence.
+        insts.push(Inst::LoadExtName {
+            dst: Writable::from_reg(memcpy_addr),
+            name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
+            offset: 0,
+        });
+        insts.push(Inst::call_unknown(
+            RegMem::reg(memcpy_addr),
+            /* uses = */ vec![arg0, arg1, arg2],
+            /* defs = */ Self::get_regs_clobbered_by_call(call_conv),
+            Opcode::Call,
+        ));
+        insts
+    }
+
    fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
        // We allocate in terms of 8-byte slots.
        match (rc, ty) {
@ -551,10 +728,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {

    fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
        let mut caller_saved = vec![
-            // Systemv calling convention:
-            // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
-            Writable::from_reg(regs::rsi()),
-            Writable::from_reg(regs::rdi()),
+            // intersection of Systemv and FastCall calling conventions:
+            // - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
+            //        SysV adds RDI, RSI (FastCall makes these callee-saved).
            Writable::from_reg(regs::rax()),
            Writable::from_reg(regs::rcx()),
            Writable::from_reg(regs::rdx()),
@ -562,25 +738,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
            Writable::from_reg(regs::r9()),
            Writable::from_reg(regs::r10()),
            Writable::from_reg(regs::r11()),
-            // - XMM: all the registers!
+            // - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
            Writable::from_reg(regs::xmm0()),
            Writable::from_reg(regs::xmm1()),
            Writable::from_reg(regs::xmm2()),
            Writable::from_reg(regs::xmm3()),
            Writable::from_reg(regs::xmm4()),
            Writable::from_reg(regs::xmm5()),
-            Writable::from_reg(regs::xmm6()),
-            Writable::from_reg(regs::xmm7()),
-            Writable::from_reg(regs::xmm8()),
-            Writable::from_reg(regs::xmm9()),
-            Writable::from_reg(regs::xmm10()),
-            Writable::from_reg(regs::xmm11()),
-            Writable::from_reg(regs::xmm12()),
-            Writable::from_reg(regs::xmm13()),
-            Writable::from_reg(regs::xmm14()),
-            Writable::from_reg(regs::xmm15()),
        ];

+        if !call_conv_of_callee.extends_windows_fastcall() {
+            caller_saved.push(Writable::from_reg(regs::rsi()));
+            caller_saved.push(Writable::from_reg(regs::rdi()));
+            caller_saved.push(Writable::from_reg(regs::xmm6()));
+            caller_saved.push(Writable::from_reg(regs::xmm7()));
+            caller_saved.push(Writable::from_reg(regs::xmm8()));
+            caller_saved.push(Writable::from_reg(regs::xmm9()));
+            caller_saved.push(Writable::from_reg(regs::xmm10()));
+            caller_saved.push(Writable::from_reg(regs::xmm11()));
+            caller_saved.push(Writable::from_reg(regs::xmm12()));
+            caller_saved.push(Writable::from_reg(regs::xmm13()));
+            caller_saved.push(Writable::from_reg(regs::xmm14()));
+            caller_saved.push(Writable::from_reg(regs::xmm15()));
+        }
+
        if call_conv_of_callee.extends_baldrdash() {
            caller_saved.push(Writable::from_reg(regs::r12()));
            caller_saved.push(Writable::from_reg(regs::r13()));
@ -591,6 +772,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {

        caller_saved
    }
+
+    fn get_ext_mode(
+        call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        if call_conv.extends_baldrdash() {
+            // Baldrdash (SpiderMonkey) always extends args and return values to the full register.
+            specified
+        } else {
+            // No other supported ABI on x64 does so.
+            ir::ArgumentExtension::None
+        }
+    }
 }

 impl From<StackAMode> for SyntheticAmode {
@ -629,74 +823,67 @@ impl From<StackAMode> for SyntheticAmode {
    }
 }

-fn in_int_reg(ty: types::Type) -> bool {
-    match ty {
-        types::I8
-        | types::I16
-        | types::I32
-        | types::I64
-        | types::B1
-        | types::B8
-        | types::B16
-        | types::B32
-        | types::B64
-        | types::R64 => true,
-        types::R32 => panic!("unexpected 32-bits refs on x64!"),
-        _ => false,
-    }
-}
-
-fn in_vec_reg(ty: types::Type) -> bool {
-    match ty {
-        types::F32 | types::F64 => true,
-        _ if ty.is_vector() => true,
-        _ => false,
-    }
-}
-
-fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
-    match call_conv {
+fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
+    let is_fastcall = match call_conv {
        CallConv::Fast
        | CallConv::Cold
        | CallConv::SystemV
        | CallConv::BaldrdashSystemV
-        | CallConv::Baldrdash2020 => {}
-        _ => panic!("int args only supported for SysV calling convention"),
+        | CallConv::Baldrdash2020 => false,
+        CallConv::WindowsFastcall => true,
+        _ => panic!("int args only supported for SysV or Fastcall calling convention"),
    };
-    match idx {
-        0 => Some(regs::rdi()),
-        1 => Some(regs::rsi()),
-        2 => Some(regs::rdx()),
-        3 => Some(regs::rcx()),
-        4 => Some(regs::r8()),
-        5 => Some(regs::r9()),
+
+    // Fastcall counts by absolute argument number; SysV counts by argument of
+    // this (integer) class.
+    let i = if is_fastcall { arg_idx } else { idx };
+    match (i, is_fastcall) {
+        (0, false) => Some(regs::rdi()),
+        (1, false) => Some(regs::rsi()),
+        (2, false) => Some(regs::rdx()),
+        (3, false) => Some(regs::rcx()),
+        (4, false) => Some(regs::r8()),
+        (5, false) => Some(regs::r9()),
+        (0, true) => Some(regs::rcx()),
+        (1, true) => Some(regs::rdx()),
+        (2, true) => Some(regs::r8()),
+        (3, true) => Some(regs::r9()),
        _ => None,
    }
 }

-fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
-    match call_conv {
+fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
+    let is_fastcall = match call_conv {
        CallConv::Fast
        | CallConv::Cold
        | CallConv::SystemV
        | CallConv::BaldrdashSystemV
-        | CallConv::Baldrdash2020 => {}
-        _ => panic!("float args only supported for SysV calling convention"),
+        | CallConv::Baldrdash2020 => false,
+        CallConv::WindowsFastcall => true,
+        _ => panic!("float args only supported for SysV or Fastcall calling convention"),
    };
-    match idx {
-        0 => Some(regs::xmm0()),
-        1 => Some(regs::xmm1()),
-        2 => Some(regs::xmm2()),
-        3 => Some(regs::xmm3()),
-        4 => Some(regs::xmm4()),
-        5 => Some(regs::xmm5()),
-        6 => Some(regs::xmm6()),
-        7 => Some(regs::xmm7()),
+
+    // Fastcall counts by absolute argument number; SysV counts by argument of
+    // this (floating-point) class.
+    let i = if is_fastcall { arg_idx } else { idx };
+    match (i, is_fastcall) {
+        (0, false) => Some(regs::xmm0()),
+        (1, false) => Some(regs::xmm1()),
+        (2, false) => Some(regs::xmm2()),
+        (3, false) => Some(regs::xmm3()),
+        (4, false) => Some(regs::xmm4()),
+        (5, false) => Some(regs::xmm5()),
+        (6, false) => Some(regs::xmm6()),
+        (7, false) => Some(regs::xmm7()),
+        (0, true) => Some(regs::xmm0()),
+        (1, true) => Some(regs::xmm1()),
+        (2, true) => Some(regs::xmm2()),
+        (3, true) => Some(regs::xmm3()),
        _ => None,
    }
 }

-fn get_intreg_for_retval_systemv(
+fn get_intreg_for_retval(
    call_conv: &CallConv,
    intreg_idx: usize,
    retval_idx: usize,
@ -714,11 +901,17 @@ fn get_intreg_for_retval_systemv(
                None
            }
        }
-        CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+        CallConv::WindowsFastcall => match intreg_idx {
+            0 => Some(regs::rax()),
+            1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
+            _ => None,
+        },
+        CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+        CallConv::AppleAarch64 => unreachable!(),
    }
 }

-fn get_fltreg_for_retval_systemv(
+fn get_fltreg_for_retval(
    call_conv: &CallConv,
    fltreg_idx: usize,
    retval_idx: usize,
@ -736,7 +929,12 @@ fn get_fltreg_for_retval_systemv(
                None
            }
        }
-        CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+        CallConv::WindowsFastcall => match fltreg_idx {
+            0 => Some(regs::xmm0()),
+            _ => None,
+        },
+        CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+        CallConv::AppleAarch64 => unreachable!(),
    }
 }

@ -769,6 +967,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
    }
 }

+fn is_callee_save_fastcall(r: RealReg) -> bool {
+    use regs::*;
+    match r.get_class() {
+        RegClass::I64 => match r.get_hw_encoding() as u8 {
+            ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
+            _ => false,
+        },
+        RegClass::V128 => match r.get_hw_encoding() as u8 {
+            6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
+            _ => false,
+        },
+        _ => panic!("Unknown register class: {:?}", r.get_class()),
+    }
+}
+
 fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
    let mut regs: Vec<Writable<RealReg>> = match call_conv {
        CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@ -784,11 +997,33 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
            .cloned()
            .filter(|r| is_callee_save_systemv(r.to_reg()))
            .collect(),
-        CallConv::WindowsFastcall => todo!("windows fastcall"),
+        CallConv::WindowsFastcall => regs
+            .iter()
+            .cloned()
+            .filter(|r| is_callee_save_fastcall(r.to_reg()))
+            .collect(),
        CallConv::Probestack => todo!("probestack?"),
+        CallConv::AppleAarch64 => unreachable!(),
    };
    // Sort registers for deterministic code output. We can do an unstable sort because the
    // registers will be unique (there are no dups).
    regs.sort_unstable_by_key(|r| r.to_reg().get_index());
    regs
 }
+
+fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
+    let mut clobbered_size = 0;
+    for reg in clobbers {
+        match reg.to_reg().get_class() {
+            RegClass::I64 => {
+                clobbered_size += 8;
+            }
+            RegClass::V128 => {
+                clobbered_size = align_to(clobbered_size, 16);
+                clobbered_size += 16;
+            }
+            _ => unreachable!(),
+        }
+    }
+    align_to(clobbered_size, 16)
+}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@ -3,7 +3,8 @@
 use super::regs::{self, show_ireg_sized};
 use super::EmitState;
 use crate::ir::condcodes::{FloatCC, IntCC};
-use crate::ir::MemFlags;
+use crate::ir::{MemFlags, Type};
+use crate::isa::x64::inst::Inst;
 use crate::machinst::*;
 use regalloc::{
    PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
@ -144,7 +145,7 @@ impl PrettyPrint for Amode {
 /// A Memory Address. These denote a 64-bit value only.
 /// Used for usual addressing modes as well as addressing modes used during compilation, when the
 /// moving SP offset is not known.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub enum SyntheticAmode {
    /// A real amode.
    Real(Amode),
@ -152,6 +153,9 @@ pub enum SyntheticAmode {
    /// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
    /// within the function.
    NominalSPOffset { simm32: u32 },
+
+    /// A virtual offset to a constant that will be emitted in the constant section of the buffer.
+    ConstantOffset(VCodeConstant),
 }

 impl SyntheticAmode {
@ -166,6 +170,7 @@ impl SyntheticAmode {
            SyntheticAmode::NominalSPOffset { .. } => {
                // Nothing to do; the base is SP and isn't involved in regalloc.
            }
+            SyntheticAmode::ConstantOffset(_) => {}
        }
    }

@ -175,10 +180,11 @@ impl SyntheticAmode {
            SyntheticAmode::NominalSPOffset { .. } => {
                // Nothing to do.
            }
+            SyntheticAmode::ConstantOffset(_) => {}
        }
    }

-    pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
+    pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode {
        match self {
            SyntheticAmode::Real(addr) => addr.clone(),
            SyntheticAmode::NominalSPOffset { simm32 } => {
@ -190,6 +196,9 @@ impl SyntheticAmode {
                );
                Amode::imm_reg(off as u32, regs::rsp())
            }
+            SyntheticAmode::ConstantOffset(c) => {
+                Amode::rip_relative(buffer.get_label_for_constant(*c))
+            }
        }
    }
 }
@ -207,6 +216,7 @@ impl PrettyPrint for SyntheticAmode {
            SyntheticAmode::NominalSPOffset { simm32 } => {
                format!("rsp({} + virtual offset)", *simm32 as i32)
            }
+            SyntheticAmode::ConstantOffset(c) => format!("const({:?})", c),
        }
    }
 }
@ -276,7 +286,7 @@ impl PrettyPrintSized for RegMemImm {

 /// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
 /// 32, 64, or 128 bit value.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub enum RegMem {
    Reg { reg: Reg },
    Mem { addr: SyntheticAmode },
@ -336,23 +346,35 @@ impl PrettyPrintSized for RegMem {
 #[derive(Copy, Clone, PartialEq)]
 pub enum AluRmiROpcode {
    Add,
+    Adc,
    Sub,
+    Sbb,
    And,
    Or,
    Xor,
    /// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
    Mul,
+    /// 8-bit form of And. Handled separately as we don't have full 8-bit op
+    /// support (we just use wider instructions). Used only with some sequences
+    /// with SETcc.
+    And8,
+    /// 8-bit form of Or.
+    Or8,
 }

 impl fmt::Debug for AluRmiROpcode {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        let name = match self {
            AluRmiROpcode::Add => "add",
+            AluRmiROpcode::Adc => "adc",
            AluRmiROpcode::Sub => "sub",
+            AluRmiROpcode::Sbb => "sbb",
            AluRmiROpcode::And => "and",
            AluRmiROpcode::Or => "or",
            AluRmiROpcode::Xor => "xor",
            AluRmiROpcode::Mul => "imul",
+            AluRmiROpcode::And8 => "and",
+            AluRmiROpcode::Or8 => "or",
        };
        write!(fmt, "{}", name)
    }
@ -364,12 +386,39 @@ impl fmt::Display for AluRmiROpcode {
    }
 }

+impl AluRmiROpcode {
+    /// Is this a special-cased 8-bit ALU op?
+    pub fn is_8bit(self) -> bool {
+        match self {
+            AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true,
+            _ => false,
+        }
+    }
+}
+
 #[derive(Clone, PartialEq)]
 pub enum UnaryRmROpcode {
    /// Bit-scan reverse.
    Bsr,
    /// Bit-scan forward.
    Bsf,
+    /// Counts leading zeroes (Leading Zero CouNT).
+    Lzcnt,
+    /// Counts trailing zeroes (Trailing Zero CouNT).
+    Tzcnt,
+    /// Counts the number of ones (POPulation CouNT).
+    Popcnt,
+}
+
+impl UnaryRmROpcode {
+    pub(crate) fn available_from(&self) -> Option<InstructionSet> {
+        match self {
+            UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => None,
+            UnaryRmROpcode::Lzcnt => Some(InstructionSet::Lzcnt),
+            UnaryRmROpcode::Tzcnt => Some(InstructionSet::BMI1),
+            UnaryRmROpcode::Popcnt => Some(InstructionSet::Popcnt),
+        }
+    }
 }

 impl fmt::Debug for UnaryRmROpcode {
@ -377,6 +426,9 @@ impl fmt::Debug for UnaryRmROpcode {
        match self {
            UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
            UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
+            UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
+            UnaryRmROpcode::Tzcnt => write!(fmt, "tzcnt"),
+            UnaryRmROpcode::Popcnt => write!(fmt, "popcnt"),
        }
    }
 }
@ -387,16 +439,30 @@ impl fmt::Display for UnaryRmROpcode {
    }
 }

+#[derive(Clone, Copy, PartialEq)]
+pub enum CmpOpcode {
+    /// CMP instruction: compute `a - b` and set flags from result.
+    Cmp,
+    /// TEST instruction: compute `a & b` and set flags from result.
+    Test,
+}
+
 pub(crate) enum InstructionSet {
    SSE,
    SSE2,
    SSSE3,
    SSE41,
    SSE42,
+    Popcnt,
+    Lzcnt,
+    BMI1,
+    #[allow(dead_code)] // never constructed (yet).
+    BMI2,
 }

 /// Some SSE operations requiring 2 operands r/m and r.
 #[derive(Clone, Copy, PartialEq)]
+#[allow(dead_code)] // some variants here aren't used just yet
 pub enum SseOpcode {
    Addps,
    Addpd,
@ -406,6 +472,7 @@ pub enum SseOpcode {
    Andpd,
    Andnps,
    Andnpd,
+    Blendvpd,
    Comiss,
    Comisd,
    Cmpps,
@ -413,6 +480,7 @@ pub enum SseOpcode {
    Cmpss,
    Cmpsd,
    Cvtdq2ps,
+    Cvtdq2pd,
    Cvtsd2ss,
    Cvtsd2si,
    Cvtsi2ss,
@ -457,7 +525,10 @@ pub enum SseOpcode {
    Pabsb,
    Pabsw,
    Pabsd,
+    Packssdw,
    Packsswb,
+    Packusdw,
+    Packuswb,
    Paddb,
    Paddd,
    Paddq,
@ -466,6 +537,7 @@ pub enum SseOpcode {
    Paddsw,
    Paddusb,
    Paddusw,
+    Palignr,
    Pand,
    Pandn,
    Pavgb,
@ -484,6 +556,7 @@ pub enum SseOpcode {
    Pinsrb,
    Pinsrw,
    Pinsrd,
+    Pmaddwd,
    Pmaxsb,
    Pmaxsw,
    Pmaxsd,
@ -497,6 +570,18 @@ pub enum SseOpcode {
    Pminuw,
    Pminud,
    Pmovmskb,
+    Pmovsxbd,
+    Pmovsxbw,
+    Pmovsxbq,
+    Pmovsxwd,
+    Pmovsxwq,
+    Pmovsxdq,
+    Pmovzxbd,
+    Pmovzxbw,
+    Pmovzxbq,
+    Pmovzxwd,
+    Pmovzxwq,
+    Pmovzxdq,
    Pmulld,
    Pmullw,
    Pmuludq,
@ -520,8 +605,12 @@ pub enum SseOpcode {
    Psubusb,
    Psubusw,
    Ptest,
+    Punpckhbw,
+    Punpcklbw,
    Pxor,
    Rcpss,
+    Roundps,
+    Roundpd,
    Roundss,
    Roundsd,
    Rsqrtss,
@ -585,6 +674,7 @@ impl SseOpcode {
            | SseOpcode::Cmpsd
            | SseOpcode::Comisd
            | SseOpcode::Cvtdq2ps
+            | SseOpcode::Cvtdq2pd
            | SseOpcode::Cvtsd2ss
            | SseOpcode::Cvtsd2si
            | SseOpcode::Cvtsi2sd
@ -608,7 +698,9 @@ impl SseOpcode {
            | SseOpcode::Mulpd
            | SseOpcode::Mulsd
            | SseOpcode::Orpd
+            | SseOpcode::Packssdw
            | SseOpcode::Packsswb
+            | SseOpcode::Packuswb
            | SseOpcode::Paddb
            | SseOpcode::Paddd
            | SseOpcode::Paddq
@ -629,6 +721,7 @@ impl SseOpcode {
            | SseOpcode::Pcmpgtd
            | SseOpcode::Pextrw
            | SseOpcode::Pinsrw
+            | SseOpcode::Pmaddwd
            | SseOpcode::Pmaxsw
            | SseOpcode::Pmaxub
            | SseOpcode::Pminsw
@ -654,6 +747,8 @@ impl SseOpcode {
            | SseOpcode::Psubsw
            | SseOpcode::Psubusb
            | SseOpcode::Psubusw
+            | SseOpcode::Punpckhbw
+            | SseOpcode::Punpcklbw
            | SseOpcode::Pxor
            | SseOpcode::Sqrtpd
            | SseOpcode::Sqrtsd
@ -662,9 +757,15 @@ impl SseOpcode {
            | SseOpcode::Ucomisd
            | SseOpcode::Xorpd => SSE2,

-            SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
+            SseOpcode::Pabsb
+            | SseOpcode::Pabsw
+            | SseOpcode::Pabsd
+            | SseOpcode::Palignr
+            | SseOpcode::Pshufb => SSSE3,

-            SseOpcode::Insertps
+            SseOpcode::Blendvpd
+            | SseOpcode::Insertps
+            | SseOpcode::Packusdw
            | SseOpcode::Pcmpeqq
            | SseOpcode::Pextrb
            | SseOpcode::Pextrd
@ -678,8 +779,22 @@ impl SseOpcode {
            | SseOpcode::Pminsd
            | SseOpcode::Pminuw
            | SseOpcode::Pminud
+            | SseOpcode::Pmovsxbd
+            | SseOpcode::Pmovsxbw
+            | SseOpcode::Pmovsxbq
+            | SseOpcode::Pmovsxwd
+            | SseOpcode::Pmovsxwq
+            | SseOpcode::Pmovsxdq
+            | SseOpcode::Pmovzxbd
+            | SseOpcode::Pmovzxbw
+            | SseOpcode::Pmovzxbq
+            | SseOpcode::Pmovzxwd
+            | SseOpcode::Pmovzxwq
+            | SseOpcode::Pmovzxdq
            | SseOpcode::Pmulld
            | SseOpcode::Ptest
+            | SseOpcode::Roundps
+            | SseOpcode::Roundpd
            | SseOpcode::Roundss
            | SseOpcode::Roundsd => SSE41,

@ -707,6 +822,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Andps => "andps",
            SseOpcode::Andnps => "andnps",
            SseOpcode::Andnpd => "andnpd",
+            SseOpcode::Blendvpd => "blendvpd",
            SseOpcode::Cmpps => "cmpps",
            SseOpcode::Cmppd => "cmppd",
            SseOpcode::Cmpss => "cmpss",
@ -714,6 +830,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Comiss => "comiss",
            SseOpcode::Comisd => "comisd",
            SseOpcode::Cvtdq2ps => "cvtdq2ps",
+            SseOpcode::Cvtdq2pd => "cvtdq2pd",
            SseOpcode::Cvtsd2ss => "cvtsd2ss",
            SseOpcode::Cvtsd2si => "cvtsd2si",
            SseOpcode::Cvtsi2ss => "cvtsi2ss",
@ -758,7 +875,10 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Pabsb => "pabsb",
            SseOpcode::Pabsw => "pabsw",
            SseOpcode::Pabsd => "pabsd",
+            SseOpcode::Packssdw => "packssdw",
            SseOpcode::Packsswb => "packsswb",
+            SseOpcode::Packusdw => "packusdw",
+            SseOpcode::Packuswb => "packuswb",
            SseOpcode::Paddb => "paddb",
            SseOpcode::Paddd => "paddd",
            SseOpcode::Paddq => "paddq",
@ -767,6 +887,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Paddsw => "paddsw",
            SseOpcode::Paddusb => "paddusb",
            SseOpcode::Paddusw => "paddusw",
+            SseOpcode::Palignr => "palignr",
            SseOpcode::Pand => "pand",
            SseOpcode::Pandn => "pandn",
            SseOpcode::Pavgb => "pavgb",
@ -785,6 +906,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Pinsrb => "pinsrb",
            SseOpcode::Pinsrw => "pinsrw",
            SseOpcode::Pinsrd => "pinsrd",
+            SseOpcode::Pmaddwd => "pmaddwd",
            SseOpcode::Pmaxsb => "pmaxsb",
            SseOpcode::Pmaxsw => "pmaxsw",
            SseOpcode::Pmaxsd => "pmaxsd",
@ -798,6 +920,18 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Pminuw => "pminuw",
            SseOpcode::Pminud => "pminud",
            SseOpcode::Pmovmskb => "pmovmskb",
+            SseOpcode::Pmovsxbd => "pmovsxbd",
+            SseOpcode::Pmovsxbw => "pmovsxbw",
+            SseOpcode::Pmovsxbq => "pmovsxbq",
+            SseOpcode::Pmovsxwd => "pmovsxwd",
+            SseOpcode::Pmovsxwq => "pmovsxwq",
+            SseOpcode::Pmovsxdq => "pmovsxdq",
+            SseOpcode::Pmovzxbd => "pmovzxbd",
+            SseOpcode::Pmovzxbw => "pmovzxbw",
+            SseOpcode::Pmovzxbq => "pmovzxbq",
+            SseOpcode::Pmovzxwd => "pmovzxwd",
+            SseOpcode::Pmovzxwq => "pmovzxwq",
+            SseOpcode::Pmovzxdq => "pmovzxdq",
            SseOpcode::Pmulld => "pmulld",
            SseOpcode::Pmullw => "pmullw",
            SseOpcode::Pmuludq => "pmuludq",
@ -821,8 +955,12 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Psubusb => "psubusb",
            SseOpcode::Psubusw => "psubusw",
            SseOpcode::Ptest => "ptest",
+            SseOpcode::Punpckhbw => "punpckhbw",
+            SseOpcode::Punpcklbw => "punpcklbw",
            SseOpcode::Pxor => "pxor",
            SseOpcode::Rcpss => "rcpss",
+            SseOpcode::Roundps => "roundps",
+            SseOpcode::Roundpd => "roundpd",
            SseOpcode::Roundss => "roundss",
            SseOpcode::Roundsd => "roundsd",
            SseOpcode::Rsqrtss => "rsqrtss",
@ -926,7 +1064,7 @@ impl fmt::Display for ExtMode {
 }

 /// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
-#[derive(Clone)]
+#[derive(Clone, Copy)]
 pub enum ShiftKind {
    ShiftLeft,
    /// Inserts zeros in the most significant bits.
@ -1171,9 +1309,30 @@ impl From<FloatCC> for FcmpImm {
    }
 }

+/// Encode the rounding modes used as part of the Rounding Control field.
+/// Note, these rounding immediates only consider the rounding control field
+/// (i.e. the rounding mode) which only take up the first two bits when encoded.
+/// However the rounding immediate which this field helps make up, also includes
+/// bits 3 and 4 which define the rounding select and precision mask respectively.
+/// These two bits are not defined here and are implictly set to zero when encoded.
+pub(crate) enum RoundImm {
+    RoundNearest = 0x00,
+    RoundDown = 0x01,
+    RoundUp = 0x02,
+    RoundZero = 0x03,
+}
+
+impl RoundImm {
+    pub(crate) fn encode(self) -> u8 {
+        self as u8
+    }
+}
+
 /// An operand's size in bits.
 #[derive(Clone, Copy, PartialEq)]
 pub enum OperandSize {
+    Size8,
+    Size16,
    Size32,
    Size64,
 }
@ -1181,24 +1340,36 @@ pub enum OperandSize {
 impl OperandSize {
    pub(crate) fn from_bytes(num_bytes: u32) -> Self {
        match num_bytes {
-            1 | 2 | 4 => OperandSize::Size32,
+            1 => OperandSize::Size8,
+            2 => OperandSize::Size16,
+            4 => OperandSize::Size32,
            8 => OperandSize::Size64,
-            _ => unreachable!(),
+            _ => unreachable!("Invalid OperandSize: {}", num_bytes),
        }
    }

+    // Computes the OperandSize for a given type.
+    // For vectors, the OperandSize of the lanes is returned.
+    pub(crate) fn from_ty(ty: Type) -> Self {
+        Self::from_bytes(ty.lane_type().bytes())
+    }
+
+    // Check that the value of self is one of the allowed sizes.
+    pub(crate) fn is_one_of(&self, sizes: &[Self]) -> bool {
+        sizes.iter().any(|val| *self == *val)
+    }
+
    pub(crate) fn to_bytes(&self) -> u8 {
        match self {
+            Self::Size8 => 1,
+            Self::Size16 => 2,
            Self::Size32 => 4,
            Self::Size64 => 8,
        }
    }

    pub(crate) fn to_bits(&self) -> u8 {
-        match self {
-            Self::Size32 => 32,
-            Self::Size64 => 64,
-        }
+        self.to_bytes() * 8
    }
 }

--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/encoding/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/encoding/mod.rs
@ -0,0 +1 @@
+pub mod rex;
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/encoding/rex.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/encoding/rex.rs
@ -0,0 +1,453 @@
+//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
+//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
+//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
+//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
+//!
+//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
+//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
+//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
+//! means "hardware register encoding number".
+
+use crate::{
+    ir::TrapCode,
+    isa::x64::inst::{
+        args::{Amode, OperandSize},
+        regs, EmitInfo, EmitState, Inst, LabelUse,
+    },
+    machinst::{MachBuffer, MachInstEmitInfo},
+};
+use regalloc::{Reg, RegClass};
+
+pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
+    let xs = (x as i32) as i64;
+    xs == ((xs << 56) >> 56)
+}
+
+pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
+    let xs = x as i32;
+    xs == ((xs << 24) >> 24)
+}
+
+#[inline(always)]
+pub(crate) fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
+    debug_assert!(m0d < 4);
+    debug_assert!(enc_reg_g < 8);
+    debug_assert!(rm_e < 8);
+    ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
+}
+
+#[inline(always)]
+pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
+    debug_assert!(shift < 4);
+    debug_assert!(enc_index < 8);
+    debug_assert!(enc_base < 8);
+    ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
+}
+
+/// Get the encoding number of a GPR.
+#[inline(always)]
+pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
+    debug_assert!(reg.is_real());
+    debug_assert_eq!(reg.get_class(), RegClass::I64);
+    reg.get_hw_encoding()
+}
+
+/// Get the encoding number of any register.
+#[inline(always)]
+pub(crate) fn reg_enc(reg: Reg) -> u8 {
+    debug_assert!(reg.is_real());
+    reg.get_hw_encoding()
+}
+
+/// A small bit field to record a REX prefix specification:
+/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
+/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub(crate) struct RexFlags(u8);
+
+impl RexFlags {
+    /// By default, set the W field, and don't always emit.
+    #[inline(always)]
+    pub(crate) fn set_w() -> Self {
+        Self(0)
+    }
+    /// Creates a new RexPrefix for which the REX.W bit will be cleared.
+    #[inline(always)]
+    pub(crate) fn clear_w() -> Self {
+        Self(1)
+    }
+
+    #[inline(always)]
+    pub(crate) fn always_emit(&mut self) -> &mut Self {
+        self.0 = self.0 | 2;
+        self
+    }
+
+    #[inline(always)]
+    pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
+        let enc_reg = int_reg_enc(reg);
+        if enc_reg >= 4 && enc_reg <= 7 {
+            self.always_emit();
+        }
+        self
+    }
+
+    #[inline(always)]
+    pub(crate) fn must_clear_w(&self) -> bool {
+        (self.0 & 1) != 0
+    }
+    #[inline(always)]
+    pub(crate) fn must_always_emit(&self) -> bool {
+        (self.0 & 2) != 0
+    }
+
+    #[inline(always)]
+    pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
+        let w = if self.must_clear_w() { 0 } else { 1 };
+        let r = (enc_g >> 3) & 1;
+        let x = 0;
+        let b = (enc_e >> 3) & 1;
+        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+        if rex != 0x40 || self.must_always_emit() {
+            sink.put1(rex);
+        }
+    }
+
+    #[inline(always)]
+    pub fn emit_three_op(
+        &self,
+        sink: &mut MachBuffer<Inst>,
+        enc_g: u8,
+        enc_index: u8,
+        enc_base: u8,
+    ) {
+        let w = if self.must_clear_w() { 0 } else { 1 };
+        let r = (enc_g >> 3) & 1;
+        let x = (enc_index >> 3) & 1;
+        let b = (enc_base >> 3) & 1;
+        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+        if rex != 0x40 || self.must_always_emit() {
+            sink.put1(rex);
+        }
+    }
+}
+
+/// Generate the proper Rex flags for the given operand size.
+impl From<OperandSize> for RexFlags {
+    fn from(size: OperandSize) -> Self {
+        match size {
+            OperandSize::Size64 => RexFlags::set_w(),
+            _ => RexFlags::clear_w(),
+        }
+    }
+}
+/// Generate Rex flags for an OperandSize/register tuple.
+impl From<(OperandSize, Reg)> for RexFlags {
+    fn from((size, reg): (OperandSize, Reg)) -> Self {
+        let mut rex = RexFlags::from(size);
+        if size == OperandSize::Size8 {
+            rex.always_emit_if_8bit_needed(reg);
+        }
+        rex
+    }
+}
+
+/// We may need to include one or more legacy prefix bytes before the REX prefix.  This enum
+/// covers only the small set of possibilities that we actually need.
+pub(crate) enum LegacyPrefixes {
+    /// No prefix bytes.
+    None,
+    /// Operand Size Override -- here, denoting "16-bit operation".
+    _66,
+    /// The Lock prefix.
+    _F0,
+    /// Operand size override and Lock.
+    _66F0,
+    /// REPNE, but no specific meaning here -- is just an opcode extension.
+    _F2,
+    /// REP/REPE, but no specific meaning here -- is just an opcode extension.
+    _F3,
+    /// Operand size override and same effect as F3.
+    _66F3,
+}
+
+impl LegacyPrefixes {
+    #[inline(always)]
+    pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
+        match self {
+            LegacyPrefixes::_66 => sink.put1(0x66),
+            LegacyPrefixes::_F0 => sink.put1(0xF0),
+            LegacyPrefixes::_66F0 => {
+                // I don't think the order matters, but in any case, this is the same order that
+                // the GNU assembler uses.
+                sink.put1(0x66);
+                sink.put1(0xF0);
+            }
+            LegacyPrefixes::_F2 => sink.put1(0xF2),
+            LegacyPrefixes::_F3 => sink.put1(0xF3),
+            LegacyPrefixes::_66F3 => {
+                sink.put1(0x66);
+                sink.put1(0xF3);
+            }
+            LegacyPrefixes::None => (),
+        }
+    }
+}
+
+/// This is the core 'emit' function for instructions that reference memory.
+///
+/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
+/// create and emit:
+/// - first the legacy prefixes, if any
+/// - then the REX prefix, if needed
+/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
+/// - then the MOD/RM byte,
+/// - then optionally, a SIB byte,
+/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
+///
+/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
+/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
+/// instructions will require their own emitter functions.
+///
+/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
+///
+/// The opcodes are written bigendianly for the convenience of callers.  For example, if the opcode
+/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
+/// 0xF3_0F_27 and `num_opcodes` == 3.
+///
+/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
+/// `rex` can specify special handling for the REX prefix.  By default, the REX prefix will
+/// indicate a 64-bit operation and will be deleted if it is redundant (0x40).  Note that for a
+/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
+/// indicate a 64-bit operation.
+pub(crate) fn emit_std_enc_mem(
+    sink: &mut MachBuffer<Inst>,
+    state: &EmitState,
+    info: &EmitInfo,
+    prefixes: LegacyPrefixes,
+    opcodes: u32,
+    mut num_opcodes: usize,
+    enc_g: u8,
+    mem_e: &Amode,
+    rex: RexFlags,
+) {
+    // General comment for this function: the registers in `mem_e` must be
+    // 64-bit integer registers, because they are part of an address
+    // expression.  But `enc_g` can be derived from a register of any class.
+
+    let srcloc = state.cur_srcloc();
+    let can_trap = mem_e.can_trap();
+    if can_trap {
+        sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+    }
+
+    prefixes.emit(sink);
+
+    match mem_e {
+        Amode::ImmReg { simm32, base, .. } => {
+            // If this is an access based off of RSP, it may trap with a stack overflow if it's the
+            // first touch of a new stack page.
+            if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
+                sink.add_trap(srcloc, TrapCode::StackOverflow);
+            }
+
+            // First, the REX byte.
+            let enc_e = int_reg_enc(*base);
+            rex.emit_two_op(sink, enc_g, enc_e);
+
+            // Now the opcode(s).  These include any other prefixes the caller
+            // hands to us.
+            while num_opcodes > 0 {
+                num_opcodes -= 1;
+                sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+            }
+
+            // Now the mod/rm and associated immediates.  This is
+            // significantly complicated due to the multiple special cases.
+            if *simm32 == 0
+                && enc_e != regs::ENC_RSP
+                && enc_e != regs::ENC_RBP
+                && enc_e != regs::ENC_R12
+                && enc_e != regs::ENC_R13
+            {
+                // FIXME JRS 2020Feb11: those four tests can surely be
+                // replaced by a single mask-and-compare check.  We should do
+                // that because this routine is likely to be hot.
+                sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
+            } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
+                sink.put1(encode_modrm(0, enc_g & 7, 4));
+                sink.put1(0x24);
+            } else if low8_will_sign_extend_to_32(*simm32)
+                && enc_e != regs::ENC_RSP
+                && enc_e != regs::ENC_R12
+            {
+                sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
+                sink.put1((simm32 & 0xFF) as u8);
+            } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
+                sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
+                sink.put4(*simm32);
+            } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
+                && low8_will_sign_extend_to_32(*simm32)
+            {
+                // REX.B distinguishes RSP from R12
+                sink.put1(encode_modrm(1, enc_g & 7, 4));
+                sink.put1(0x24);
+                sink.put1((simm32 & 0xFF) as u8);
+            } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
+                //.. wait for test case for RSP case
+                // REX.B distinguishes RSP from R12
+                sink.put1(encode_modrm(2, enc_g & 7, 4));
+                sink.put1(0x24);
+                sink.put4(*simm32);
+            } else {
+                unreachable!("ImmReg");
+            }
+        }
+
+        Amode::ImmRegRegShift {
+            simm32,
+            base: reg_base,
+            index: reg_index,
+            shift,
+            ..
+        } => {
+            // If this is an access based off of RSP, it may trap with a stack overflow if it's the
+            // first touch of a new stack page.
+            if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
+                sink.add_trap(srcloc, TrapCode::StackOverflow);
+            }
+
+            let enc_base = int_reg_enc(*reg_base);
+            let enc_index = int_reg_enc(*reg_index);
+
+            // The rex byte.
+            rex.emit_three_op(sink, enc_g, enc_index, enc_base);
+
+            // All other prefixes and opcodes.
+            while num_opcodes > 0 {
+                num_opcodes -= 1;
+                sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+            }
+
+            // modrm, SIB, immediates.
+            if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
+                sink.put1(encode_modrm(1, enc_g & 7, 4));
+                sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+                sink.put1(*simm32 as u8);
+            } else if enc_index != regs::ENC_RSP {
+                sink.put1(encode_modrm(2, enc_g & 7, 4));
+                sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+                sink.put4(*simm32);
+            } else {
+                panic!("ImmRegRegShift");
+            }
+        }
+
+        Amode::RipRelative { ref target } => {
+            // First, the REX byte, with REX.B = 0.
+            rex.emit_two_op(sink, enc_g, 0);
+
+            // Now the opcode(s).  These include any other prefixes the caller
+            // hands to us.
+            while num_opcodes > 0 {
+                num_opcodes -= 1;
+                sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+            }
+
+            // RIP-relative is mod=00, rm=101.
+            sink.put1(encode_modrm(0, enc_g & 7, 0b101));
+
+            let offset = sink.cur_offset();
+            sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
+            sink.put4(0);
+        }
+    }
+}
+
+/// This is the core 'emit' function for instructions that do not reference memory.
+///
+/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
+/// operand is a register rather than memory.  Hence it is much simpler.
+pub(crate) fn emit_std_enc_enc(
+    sink: &mut MachBuffer<Inst>,
+    prefixes: LegacyPrefixes,
+    opcodes: u32,
+    mut num_opcodes: usize,
+    enc_g: u8,
+    enc_e: u8,
+    rex: RexFlags,
+) {
+    // EncG and EncE can be derived from registers of any class, and they
+    // don't even have to be from the same class.  For example, for an
+    // integer-to-FP conversion insn, one might be RegClass::I64 and the other
+    // RegClass::V128.
+
+    // The legacy prefixes.
+    prefixes.emit(sink);
+
+    // The rex byte.
+    rex.emit_two_op(sink, enc_g, enc_e);
+
+    // All other prefixes and opcodes.
+    while num_opcodes > 0 {
+        num_opcodes -= 1;
+        sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+    }
+
+    // Now the mod/rm byte.  The instruction we're generating doesn't access
+    // memory, so there is no SIB byte or immediate -- we're done.
+    sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
+}
+
+// These are merely wrappers for the above two functions that facilitate passing
+// actual `Reg`s rather than their encodings.
+
+pub(crate) fn emit_std_reg_mem(
+    sink: &mut MachBuffer<Inst>,
+    state: &EmitState,
+    info: &EmitInfo,
+    prefixes: LegacyPrefixes,
+    opcodes: u32,
+    num_opcodes: usize,
+    reg_g: Reg,
+    mem_e: &Amode,
+    rex: RexFlags,
+) {
+    let enc_g = reg_enc(reg_g);
+    emit_std_enc_mem(
+        sink,
+        state,
+        info,
+        prefixes,
+        opcodes,
+        num_opcodes,
+        enc_g,
+        mem_e,
+        rex,
+    );
+}
+
+pub(crate) fn emit_std_reg_reg(
+    sink: &mut MachBuffer<Inst>,
+    prefixes: LegacyPrefixes,
+    opcodes: u32,
+    num_opcodes: usize,
+    reg_g: Reg,
+    reg_e: Reg,
+    rex: RexFlags,
+) {
+    let enc_g = reg_enc(reg_g);
+    let enc_e = reg_enc(reg_e);
+    emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
+}
+
+/// Write a suitable number of bits from an imm64 to the sink.
+pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
+    match size {
+        8 | 4 => sink.put4(simm32),
+        2 => sink.put2(simm32 as u16),
+        1 => sink.put1(simm32 as u8),
+        _ => unreachable!(),
+    }
+}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
@ -1,14 +1,20 @@
 //! Registers, the Universe thereof, and printing.
 //!
-//! These are ordered by sequence number, as required in the Universe.  The strange ordering is
-//! intended to make callee-save registers available before caller-saved ones.  This is a net win
-//! provided that each function makes at least one onward call.  It'll be a net loss for leaf
-//! functions, and we should change the ordering in that case, so as to make caller-save regs
-//! available first.
+//! These are ordered by sequence number, as required in the Universe.
 //!
-//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
-//! Also, they will have to be ABI dependent.  Need to find a way to avoid constructing a universe
-//! for each function we compile.
+//! The caller-saved registers are placed first in order to prefer not to clobber (requiring
+//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic
+//! in the backend that will apply such pressure; the register allocator's cost heuristics are not
+//! aware of the cost of clobber-save/restore code.
+//!
+//! One might worry that this pessimizes code with many callsites, where using caller-saves causes
+//! us to have to save them (as we are the caller) frequently. However, the register allocator
+//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the
+//! caller-saved (i.e., callee-clobbered) registers.
+//!
+//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we
+//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction
+//! at callsites.

 use crate::settings;
 use alloc::vec::Vec;
@ -17,11 +23,20 @@ use regalloc::{
 };
 use std::string::String;

-// Hardware encodings for a few registers.
+// Hardware encodings (note the special rax, rcx, rdx, rbx order).

+pub const ENC_RAX: u8 = 0;
+pub const ENC_RCX: u8 = 1;
+pub const ENC_RDX: u8 = 2;
 pub const ENC_RBX: u8 = 3;
 pub const ENC_RSP: u8 = 4;
 pub const ENC_RBP: u8 = 5;
+pub const ENC_RSI: u8 = 6;
+pub const ENC_RDI: u8 = 7;
+pub const ENC_R8: u8 = 8;
+pub const ENC_R9: u8 = 9;
+pub const ENC_R10: u8 = 10;
+pub const ENC_R11: u8 = 11;
 pub const ENC_R12: u8 = 12;
 pub const ENC_R13: u8 = 13;
 pub const ENC_R14: u8 = 14;
@ -31,44 +46,44 @@ fn gpr(enc: u8, index: u8) -> Reg {
    Reg::new_real(RegClass::I64, enc, index)
 }

-pub(crate) fn r12() -> Reg {
-    gpr(ENC_R12, 16)
-}
-pub(crate) fn r13() -> Reg {
-    gpr(ENC_R13, 17)
-}
-pub(crate) fn r14() -> Reg {
-    gpr(ENC_R14, 18)
-}
-pub(crate) fn rbx() -> Reg {
-    gpr(ENC_RBX, 19)
-}
 pub(crate) fn rsi() -> Reg {
-    gpr(6, 20)
+    gpr(ENC_RSI, 16)
 }
 pub(crate) fn rdi() -> Reg {
-    gpr(7, 21)
+    gpr(ENC_RDI, 17)
 }
 pub(crate) fn rax() -> Reg {
-    gpr(0, 22)
+    gpr(ENC_RAX, 18)
 }
 pub(crate) fn rcx() -> Reg {
-    gpr(1, 23)
+    gpr(ENC_RCX, 19)
 }
 pub(crate) fn rdx() -> Reg {
-    gpr(2, 24)
+    gpr(ENC_RDX, 20)
 }
 pub(crate) fn r8() -> Reg {
-    gpr(8, 25)
+    gpr(ENC_R8, 21)
 }
 pub(crate) fn r9() -> Reg {
-    gpr(9, 26)
+    gpr(ENC_R9, 22)
 }
 pub(crate) fn r10() -> Reg {
-    gpr(10, 27)
+    gpr(ENC_R10, 23)
 }
 pub(crate) fn r11() -> Reg {
-    gpr(11, 28)
+    gpr(ENC_R11, 24)
+}
+pub(crate) fn r12() -> Reg {
+    gpr(ENC_R12, 25)
+}
+pub(crate) fn r13() -> Reg {
+    gpr(ENC_R13, 26)
+}
+pub(crate) fn r14() -> Reg {
+    gpr(ENC_R14, 27)
+}
+pub(crate) fn rbx() -> Reg {
+    gpr(ENC_RBX, 28)
 }

 pub(crate) fn r15() -> Reg {
@ -176,13 +191,6 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
    // Integer regs.
    let first_gpr = regs.len();

-    // Callee-saved, in the SystemV x86_64 ABI.
-    regs.push((r12().to_real_reg(), "%r12".into()));
-    regs.push((r13().to_real_reg(), "%r13".into()));
-    regs.push((r14().to_real_reg(), "%r14".into()));
-
-    regs.push((rbx().to_real_reg(), "%rbx".into()));
-
    // Caller-saved, in the SystemV x86_64 ABI.
    regs.push((rsi().to_real_reg(), "%rsi".into()));
    regs.push((rdi().to_real_reg(), "%rdi".into()));
@ -194,6 +202,13 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
    regs.push((r10().to_real_reg(), "%r10".into()));
    regs.push((r11().to_real_reg(), "%r11".into()));

+    // Callee-saved, in the SystemV x86_64 ABI.
+    regs.push((r12().to_real_reg(), "%r12".into()));
+    regs.push((r13().to_real_reg(), "%r13".into()));
+    regs.push((r14().to_real_reg(), "%r14".into()));
+
+    regs.push((rbx().to_real_reg(), "%rbx".into()));
+
    // Other regs, not available to the allocator.
    debug_assert_eq!(r15(), pinned_reg());
    let allocable = if use_pinned_reg {
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
@ -1,125 +1,5 @@
-use crate::isa::unwind::input::UnwindInfo;
-use crate::isa::x64::inst::{
-    args::{AluRmiROpcode, Amode, RegMemImm, SyntheticAmode},
-    regs, Inst,
-};
-use crate::machinst::{UnwindInfoContext, UnwindInfoGenerator};
-use crate::result::CodegenResult;
-use alloc::vec::Vec;
-use regalloc::Reg;
-
 #[cfg(feature = "unwind")]
 pub(crate) mod systemv;

-pub struct X64UnwindInfo;
-
-impl UnwindInfoGenerator<Inst> for X64UnwindInfo {
-    fn create_unwind_info(
-        context: UnwindInfoContext<Inst>,
-    ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
-        use crate::isa::unwind::input::{self, UnwindCode};
-        let mut codes = Vec::new();
-        const WORD_SIZE: u8 = 8;
-
-        for i in context.prologue.clone() {
-            let i = i as usize;
-            let inst = &context.insts[i];
-            let offset = context.insts_layout[i];
-
-            match inst {
-                Inst::Push64 {
-                    src: RegMemImm::Reg { reg },
-                } => {
-                    codes.push((
-                        offset,
-                        UnwindCode::StackAlloc {
-                            size: WORD_SIZE.into(),
-                        },
-                    ));
-                    codes.push((
-                        offset,
-                        UnwindCode::SaveRegister {
-                            reg: *reg,
-                            stack_offset: 0,
-                        },
-                    ));
-                }
-                Inst::MovRR { src, dst, .. } => {
-                    if *src == regs::rsp() {
-                        codes.push((offset, UnwindCode::SetFramePointer { reg: dst.to_reg() }));
-                    }
-                }
-                Inst::AluRmiR {
-                    is_64: true,
-                    op: AluRmiROpcode::Sub,
-                    src: RegMemImm::Imm { simm32 },
-                    dst,
-                    ..
-                } if dst.to_reg() == regs::rsp() => {
-                    let imm = *simm32;
-                    codes.push((offset, UnwindCode::StackAlloc { size: imm }));
-                }
-                Inst::MovRM {
-                    src,
-                    dst: SyntheticAmode::Real(Amode::ImmReg { simm32, base, .. }),
-                    ..
-                } if *base == regs::rsp() => {
-                    // `mov reg, imm(rsp)`
-                    let imm = *simm32;
-                    codes.push((
-                        offset,
-                        UnwindCode::SaveRegister {
-                            reg: *src,
-                            stack_offset: imm,
-                        },
-                    ));
-                }
-                Inst::AluRmiR {
-                    is_64: true,
-                    op: AluRmiROpcode::Add,
-                    src: RegMemImm::Imm { simm32 },
-                    dst,
-                    ..
-                } if dst.to_reg() == regs::rsp() => {
-                    let imm = *simm32;
-                    codes.push((offset, UnwindCode::StackDealloc { size: imm }));
-                }
-                _ => {}
-            }
-        }
-
-        let last_epilogue_end = context.len;
-        let epilogues_unwind_codes = context
-            .epilogues
-            .iter()
-            .map(|epilogue| {
-                // TODO add logic to process epilogue instruction instead of
-                // returning empty array.
-                let end = epilogue.end as usize - 1;
-                let end_offset = context.insts_layout[end];
-                if end_offset == last_epilogue_end {
-                    // Do not remember/restore for very last epilogue.
-                    return vec![];
-                }
-
-                let start = epilogue.start as usize;
-                let offset = context.insts_layout[start];
-                vec![
-                    (offset, UnwindCode::RememberState),
-                    // TODO epilogue instructions
-                    (end_offset, UnwindCode::RestoreState),
-                ]
-            })
-            .collect();
-
-        let prologue_size = context.insts_layout[context.prologue.end as usize];
-        Ok(Some(input::UnwindInfo {
-            prologue_size,
-            prologue_unwind_codes: codes,
-            epilogues_unwind_codes,
-            function_size: context.len,
-            word_size: WORD_SIZE,
-            initial_sp_offset: WORD_SIZE,
-        }))
-    }
-}
+#[cfg(feature = "unwind")]
+pub(crate) mod winx64;
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
@ -1,8 +1,6 @@
 //! Unwind information for System V ABI (x86-64).

-use crate::isa::unwind::input;
-use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
-use crate::result::CodegenResult;
+use crate::isa::unwind::systemv::RegisterMappingError;
 use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
 use regalloc::{Reg, RegClass};

@ -82,21 +80,18 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
    }
 }

-pub(crate) fn create_unwind_info(
-    unwind: input::UnwindInfo<Reg>,
-) -> CodegenResult<Option<UnwindInfo>> {
-    struct RegisterMapper;
-    impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
-        fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
-            Ok(map_reg(reg)?.0)
-        }
-        fn sp(&self) -> u16 {
-            X86_64::RSP.0
-        }
-    }
-    let map = RegisterMapper;
+pub(crate) struct RegisterMapper;

-    Ok(Some(UnwindInfo::build(unwind, &map)?))
+impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+    fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+        Ok(map_reg(reg)?.0)
+    }
+    fn sp(&self) -> u16 {
+        X86_64::RSP.0
+    }
+    fn fp(&self) -> u16 {
+        X86_64::RBP.0
+    }
 }

 #[cfg(test)]
@ -114,6 +109,7 @@ mod tests {
    use target_lexicon::triple;

    #[test]
+    #[cfg_attr(feature = "old-x86-backend", ignore)]
    fn test_simple_func() {
        let isa = lookup(triple!("x86_64"))
            .expect("expect x86 ISA")
@ -136,7 +132,7 @@ mod tests {
            _ => panic!("expected unwind information"),
        };

-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 13, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 17, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
    }

    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
@ -156,6 +152,7 @@ mod tests {
    }

    #[test]
+    #[cfg_attr(feature = "old-x86-backend", ignore)]
    fn test_multi_return_func() {
        let isa = lookup(triple!("x86_64"))
            .expect("expect x86 ISA")
@ -175,7 +172,7 @@ mod tests {
            _ => panic!("expected unwind information"),
        };

-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }");
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
    }

    fn create_multi_return_function(call_conv: CallConv) -> Function {
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/winx64.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/winx64.rs
@ -0,0 +1,16 @@
+//! Unwind information for Windows x64 ABI.
+
+use regalloc::{Reg, RegClass};
+
+pub(crate) struct RegisterMapper;
+
+impl crate::isa::unwind::winx64::RegisterMapper<Reg> for RegisterMapper {
+    fn map(reg: Reg) -> crate::isa::unwind::winx64::MappedRegister {
+        use crate::isa::unwind::winx64::MappedRegister;
+        match reg.get_class() {
+            RegClass::I64 => MappedRegister::Int(reg.get_hw_encoding()),
+            RegClass::V128 => MappedRegister::Xmm(reg.get_hw_encoding()),
+            _ => unreachable!(),
+        }
+    }
+}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@ -9,10 +9,14 @@ use crate::isa::Builder as IsaBuilder;
 use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
 use crate::result::CodegenResult;
 use crate::settings::{self as shared_settings, Flags};
-use alloc::boxed::Box;
-use regalloc::{PrettyPrint, RealRegUniverse};
+use alloc::{boxed::Box, vec::Vec};
+use core::hash::{Hash, Hasher};
+use regalloc::{PrettyPrint, RealRegUniverse, Reg};
 use target_lexicon::Triple;

+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv;
+
 mod abi;
 mod inst;
 mod lower;
@ -59,7 +63,8 @@ impl MachBackend for X64Backend {
        let buffer = vcode.emit();
        let buffer = buffer.finish();
        let frame_size = vcode.frame_size();
-        let unwind_info = vcode.unwind_info()?;
+        let value_labels_ranges = vcode.value_labels_ranges();
+        let stackslot_offsets = vcode.stackslot_offsets().clone();

        let disasm = if want_disasm {
            Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
@ -71,7 +76,8 @@ impl MachBackend for X64Backend {
            buffer,
            frame_size,
            disasm,
-            unwind_info,
+            value_labels_ranges,
+            stackslot_offsets,
        })
    }

@ -79,6 +85,15 @@ impl MachBackend for X64Backend {
        &self.flags
    }

+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.x64_flags.iter().collect()
+    }
+
+    fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
+        self.flags.hash(&mut hasher);
+        self.x64_flags.hash(&mut hasher);
+    }
+
    fn name(&self) -> &'static str {
        "x64"
    }
@ -92,15 +107,15 @@ impl MachBackend for X64Backend {
    }

    fn unsigned_add_overflow_condition(&self) -> IntCC {
-        // Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
-        // overflow of an add.
-        IntCC::UnsignedGreaterThanOrEqual
+        // Unsigned `<`; this corresponds to the carry flag set on x86, which
+        // indicates an add has overflowed.
+        IntCC::UnsignedLessThan
    }

    fn unsigned_sub_overflow_condition(&self) -> IntCC {
-        // unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
-        // underflow of a subtract (carry is borrow for subtract).
-        IntCC::UnsignedGreaterThanOrEqual
+        // unsigned `<`; this corresponds to the carry flag set on x86, which
+        // indicates a sub has underflowed (carry is borrow for subtract).
+        IntCC::UnsignedLessThan
    }

    #[cfg(feature = "unwind")]
@ -111,14 +126,22 @@ impl MachBackend for X64Backend {
    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
        use crate::isa::unwind::UnwindInfo;
        use crate::machinst::UnwindInfoKind;
-        Ok(match (result.unwind_info.as_ref(), kind) {
-            (Some(info), UnwindInfoKind::SystemV) => {
-                inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
-            }
-            (Some(_info), UnwindInfoKind::Windows) => {
-                //TODO inst::unwind::winx64::create_unwind_info(info.clone())?.map(|u| UnwindInfo::WindowsX64(u))
-                None
+        Ok(match kind {
+            UnwindInfoKind::SystemV => {
+                let mapper = self::inst::unwind::systemv::RegisterMapper;
+                Some(UnwindInfo::SystemV(
+                    crate::isa::unwind::systemv::create_unwind_info_from_insts(
+                        &result.buffer.unwind_info[..],
+                        result.buffer.data.len(),
+                        &mapper,
+                    )?,
+                ))
            }
+            UnwindInfoKind::Windows => Some(UnwindInfo::WindowsX64(
+                crate::isa::unwind::winx64::create_unwind_info_from_insts::<
+                    self::inst::unwind::winx64::RegisterMapper,
+                >(&result.buffer.unwind_info[..])?,
+            )),
            _ => None,
        })
    }
@ -127,6 +150,11 @@ impl MachBackend for X64Backend {
    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
        Some(inst::unwind::systemv::create_cie())
    }
+
+    #[cfg(feature = "unwind")]
+    fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
+        inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
+    }
 }

 /// Create a new `isa::Builder`.
--- a/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
@ -1,6 +1,6 @@
 //! x86 Settings.

-use crate::settings::{self, detail, Builder};
+use crate::settings::{self, detail, Builder, Value};
 use core::fmt;

 // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
--- a/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
@ -144,8 +144,13 @@ impl ArgAssigner for Args {
            return ValueConversion::VectorSplit.into();
        }

-        // Small integers are extended to the size of a pointer register.
-        if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
+        // Small integers are extended to the size of a pointer register, but
+        // only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI
+        // does, but our other supported ABIs on x86 do not.
+        if ty.is_int()
+            && ty.bits() < u16::from(self.pointer_bits)
+            && self.call_conv.extends_baldrdash()
+        {
            match arg.extension {
                ArgumentExtension::None => {}
                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
@ -507,6 +512,7 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> Codege
        }
        CallConv::Probestack => unimplemented!("probestack calling convention"),
        CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
+        CallConv::AppleAarch64 => unreachable!(),
    }
 }

--- a/Показать больше
+++ b/Показать больше