Bug 1633721, part 1 of 2: Bump Cranelift to revision 5e0268a542f612fee36d0256ed1f6a0e18dc02b3. r=bbouvier

This patch updates the vendored version of Cranelift, pulling in the reference-types support recently merged in Cranelift's PR bytecodealliance/wasmtime#1852. Usage of this update to support reftypes in SpiderMonkey on aarch64 is added in the subsequent commit. Differential Revision: https://phabricator.services.mozilla.com/D83582
2020-07-16 19:15:05 +00:00 · 2020-07-16 19:15:05 +00:00 · c07df47355
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -65,7 +65,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
 [source."https://github.com/bytecodealliance/wasmtime"]
 git = "https://github.com/bytecodealliance/wasmtime"
 replace-with = "vendored-sources"
-rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"

 [source."https://github.com/badboy/failure"]
 git = "https://github.com/badboy/failure"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -733,22 +733,22 @@ dependencies = [

 [[package]]
 name = "cranelift-bforest"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
 dependencies = [
- "cranelift-entity 0.65.0",
+ "cranelift-entity 0.66.0",
 ]

 [[package]]
 name = "cranelift-codegen"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
 dependencies = [
 "byteorder",
 "cranelift-bforest",
 "cranelift-codegen-meta",
 "cranelift-codegen-shared",
- "cranelift-entity 0.65.0",
+ "cranelift-entity 0.66.0",
 "log",
 "regalloc",
 "smallvec",
@ -758,17 +758,17 @@ dependencies = [

 [[package]]
 name = "cranelift-codegen-meta"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
 dependencies = [
 "cranelift-codegen-shared",
- "cranelift-entity 0.65.0",
+ "cranelift-entity 0.66.0",
 ]

 [[package]]
 name = "cranelift-codegen-shared"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"

 [[package]]
 name = "cranelift-entity"
@ -777,13 +777,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9

 [[package]]
 name = "cranelift-entity"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"

 [[package]]
 name = "cranelift-frontend"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
 dependencies = [
 "cranelift-codegen",
 "log",
@ -793,15 +793,15 @@ dependencies = [

 [[package]]
 name = "cranelift-wasm"
-version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+version = "0.66.0"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
 dependencies = [
 "cranelift-codegen",
- "cranelift-entity 0.65.0",
+ "cranelift-entity 0.66.0",
 "cranelift-frontend",
 "log",
 "thiserror",
- "wasmparser 0.58.0",
+ "wasmparser 0.59.0",
 ]

 [[package]]
@ -3951,9 +3951,9 @@ dependencies = [

 [[package]]
 name = "regalloc"
-version = "0.0.26"
+version = "0.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95"
+checksum = "3598bed0895fe0f72a9e0b00ef9e3a3c8af978a8401b2f2046dec5927de6364a"
 dependencies = [
 "log",
 "rustc-hash",
@ -5382,9 +5382,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"

 [[package]]
 name = "wasmparser"
-version = "0.58.0"
+version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"
+checksum = "a950e6a618f62147fd514ff445b2a0b53120d382751960797f85f058c7eda9b9"

 [[package]]
 name = "wast"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -74,8 +74,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f

 [patch.crates-io.cranelift-codegen]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"

 [patch.crates-io.cranelift-wasm]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
+rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
--- a/js/src/wasm/cranelift/Cargo.toml
+++ b/js/src/wasm/cranelift/Cargo.toml
@ -13,8 +13,8 @@ name = "baldrdash"
 # cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
 # you should update the following $TOP_LEVEL/Cargo.toml file: look for the
 # revision (rev) hashes of both cranelift dependencies (codegen and wasm).
-cranelift-codegen = { version = "0.65.0", default-features = false }
-cranelift-wasm = { version = "0.65.0" }
+cranelift-codegen = { version = "0.66.0", default-features = false }
+cranelift-wasm = { version = "0.66.0" }
 log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
 env_logger = "0.6"
 smallvec = "1.0"
--- a/third_party/rust/cranelift-bforest/.cargo-checksum.json
+++ b/third_party/rust/cranelift-bforest/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"bbdc795c9e6a82fd6555b6bb1a01d2c4d9439278e6fd801225205be69c73a2d7","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
+{"files":{"Cargo.toml":"dbbc3b62d88aec50ed9e05a6306e3aa4c5d12bf473dff48cf020843a5db31a85","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
--- a/third_party/rust/cranelift-bforest/Cargo.toml
+++ b/third_party/rust/cranelift-bforest/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-bforest"
-version = "0.65.0"
+version = "0.66.0"
 description = "A forest of B+-trees"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-bforest"
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
 edition = "2018"

 [dependencies]
-cranelift-entity = { path = "../entity", version = "0.65.0", default-features = false }
+cranelift-entity = { path = "../entity", version = "0.66.0", default-features = false }

 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"7c01a301a32e60cd9b0edd66f4cf8700e5de1d31607437ea756d4f8b0ae29a54","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"2b3c5105e32bce932d2628963cc5c853207e37204a6aec38caace60e52870bbe","src/isa/x86/instructions.rs":"1aee81c8bc0215fa1cad83e97a0915b24521ae61d503cd727a2406a25dd60f29","src/isa/x86/legalize.rs":"ddc834ae8f4a06ca8e3fccf7aef6a097163a2f8d258a7cbc3cc6a8b93c9c0413","src/isa/x86/mod.rs":"2b84474c2b0e272c1ebe32530c57f6b11133127c286c8f82c5ae5b6486386238","src/isa/x86/opcodes.rs":"79d42b71f78119f4ca1dc4fc90bc9efb04c6fc526e01cbe79368aa59f117266a","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"2d3e09ca34638e19621aef2492ca6943b105e6add830bd91bddbdc85277cb680","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"5ffa26a91b344fb7014a34e0d97b4df90d604a5bd49a49a75c262591deb8e6c4","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"7800f51d97a95d572310f6c80ded59c1c84cf3ba06f9425f4205f88ac46b4e98","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
+{"files":{"Cargo.toml":"d01629d478557c181b999c1722b6284435f45f04957d7cb55735b9605136a23e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"a0f5212fa593caf66371f5ee4b15e501939a9407c4663bff6b3ba356b11ca1b4","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"5ae9e5453c3aa8b12a37e5579b602162fa9e153b444e89bb89342614b6a5ed13","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"75fa07c819b467857d1f4231123e16e519312c9a58f5e7b9c633b0768fd88b42","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"186c688dd8ac773f2b2c4c1f1cbdb7a66ca13a8ed90c03f87dfe7fdaa12c15b3","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"745ef09f4927b5334d68155fa047910ef96311feef7ec20964bb033c3419cd3c","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"47a5e9fb3b7917cfe817d56dcc77c0470545e451e0f38a875af0531fbd9b6a58","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38e65efb654dd39e9929b8506ba94d6214d996d727815829b659fd1a3b73bdce","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"7800f51d97a95d572310f6c80ded59c1c84cf3ba06f9425f4205f88ac46b4e98","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
--- a/third_party/rust/cranelift-codegen-meta/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-meta/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 name = "cranelift-codegen-meta"
 authors = ["The Cranelift Project Developers"]
-version = "0.65.0"
+version = "0.66.0"
 description = "Metaprogram for cranelift-codegen code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
@ -12,8 +12,8 @@ edition = "2018"
 rustdoc-args = [ "--document-private-items" ]

 [dependencies]
-cranelift-codegen-shared = { path = "../shared", version = "0.65.0" }
-cranelift-entity = { path = "../../entity", version = "0.65.0" }
+cranelift-codegen-shared = { path = "../shared", version = "0.66.0" }
+cranelift-entity = { path = "../../entity", version = "0.66.0" }

 [badges]
 maintenance = { status = "experimental" }
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
@ -62,7 +62,7 @@ impl InstructionGroup {
        self.instructions
            .iter()
            .find(|inst| inst.name == name)
-            .unwrap_or_else(|| panic!("unexisting instruction with name {}", name))
+            .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name))
    }
 }

@ -598,7 +598,7 @@ fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionF

    assert_eq!(
        num_values, format.num_value_operands,
-        "inst {} doesnt' have as many value input operand as its format {} declares; you may need \
+        "inst {} doesn't have as many value input operands as its format {} declares; you may need \
         to use a different format.",
        inst_name, format.name
    );
--- a/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
@ -211,6 +211,24 @@ impl TypeVar {
                    "can't double 256 lanes"
                );
            }
+            DerivedFunc::MergeLanes => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+                    "can't double all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+                    "can't double all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+                    "can't double all boolean types"
+                );
+                assert!(
+                    *ts.lanes.iter().min().unwrap() > 1,
+                    "can't halve a scalar type"
+                );
+            }
            DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
        }

@ -248,6 +266,9 @@ impl TypeVar {
    pub fn split_lanes(&self) -> TypeVar {
        self.derived(DerivedFunc::SplitLanes)
    }
+    pub fn merge_lanes(&self) -> TypeVar {
+        self.derived(DerivedFunc::MergeLanes)
+    }

    /// Constrain the range of types this variable can assume to a subset of those in the typeset
    /// ts.
@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc {
    HalfVector,
    DoubleVector,
    SplitLanes,
+    MergeLanes,
 }

 impl DerivedFunc {
@ -367,6 +389,7 @@ impl DerivedFunc {
            DerivedFunc::HalfVector => "half_vector",
            DerivedFunc::DoubleVector => "double_vector",
            DerivedFunc::SplitLanes => "split_lanes",
+            DerivedFunc::MergeLanes => "merge_lanes",
        }
    }

@ -377,6 +400,8 @@ impl DerivedFunc {
            DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
            DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
            DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
+            DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
+            DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
            _ => None,
        }
    }
@ -462,6 +487,7 @@ impl TypeSet {
            DerivedFunc::HalfVector => self.half_vector(),
            DerivedFunc::DoubleVector => self.double_vector(),
            DerivedFunc::SplitLanes => self.half_width().double_vector(),
+            DerivedFunc::MergeLanes => self.double_width().half_vector(),
        }
    }

@ -601,7 +627,8 @@ impl TypeSet {
            DerivedFunc::DoubleWidth => self.half_width(),
            DerivedFunc::HalfVector => self.double_vector(),
            DerivedFunc::DoubleVector => self.half_vector(),
-            DerivedFunc::SplitLanes => self.half_vector().double_width(),
+            DerivedFunc::SplitLanes => self.double_width().half_vector(),
+            DerivedFunc::MergeLanes => self.half_width().double_vector(),
        }
    }

--- a/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
@ -700,6 +700,7 @@ fn gen_isa(
 pub(crate) fn generate(
    isas: &[TargetIsa],
    transform_groups: &TransformGroups,
+    extra_legalization_groups: &[&'static str],
    filename_prefix: &str,
    out_dir: &str,
 ) -> Result<(), error::Error> {
@ -711,8 +712,14 @@ pub(crate) fn generate(
        fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?;
    }

+    // Add extra legalization groups that were explicitly requested.
+    for group in extra_legalization_groups {
+        shared_group_names.insert(group);
+    }
+
    // Generate shared legalize groups.
    let mut fmt = Formatter::new();
+    // Generate shared legalize groups.
    let mut type_sets = UniqueTable::new();
    let mut sorted_shared_group_names = Vec::from_iter(shared_group_names);
    sorted_shared_group_names.sort();
--- a/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
@ -6,10 +6,10 @@ use std::fmt;
 mod arm32;
 mod arm64;
 mod riscv;
-mod x86;
+pub(crate) mod x86;

 /// Represents known ISA target.
-#[derive(Copy, Clone)]
+#[derive(PartialEq, Copy, Clone)]
 pub enum Isa {
    Riscv,
    X86,
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@ -1669,6 +1669,7 @@ fn define_simd(
    let ssub_sat = shared.by_name("ssub_sat");
    let store = shared.by_name("store");
    let store_complex = shared.by_name("store_complex");
+    let swiden_low = shared.by_name("swiden_low");
    let uadd_sat = shared.by_name("uadd_sat");
    let uload8x8 = shared.by_name("uload8x8");
    let uload8x8_complex = shared.by_name("uload8x8_complex");
@ -1676,6 +1677,9 @@ fn define_simd(
    let uload16x4_complex = shared.by_name("uload16x4_complex");
    let uload32x2 = shared.by_name("uload32x2");
    let uload32x2_complex = shared.by_name("uload32x2_complex");
+    let snarrow = shared.by_name("snarrow");
+    let unarrow = shared.by_name("unarrow");
+    let uwiden_low = shared.by_name("uwiden_low");
    let ushr_imm = shared.by_name("ushr_imm");
    let usub_sat = shared.by_name("usub_sat");
    let vconst = shared.by_name("vconst");
@ -1686,7 +1690,6 @@ fn define_simd(
    let x86_fmin = x86.by_name("x86_fmin");
    let x86_movlhps = x86.by_name("x86_movlhps");
    let x86_movsd = x86.by_name("x86_movsd");
-    let x86_packss = x86.by_name("x86_packss");
    let x86_pblendw = x86.by_name("x86_pblendw");
    let x86_pextr = x86.by_name("x86_pextr");
    let x86_pinsr = x86.by_name("x86_pinsr");
@ -1696,6 +1699,7 @@ fn define_simd(
    let x86_pminu = x86.by_name("x86_pminu");
    let x86_pmullq = x86.by_name("x86_pmullq");
    let x86_pmuludq = x86.by_name("x86_pmuludq");
+    let x86_palignr = x86.by_name("x86_palignr");
    let x86_pshufb = x86.by_name("x86_pshufb");
    let x86_pshufd = x86.by_name("x86_pshufd");
    let x86_psll = x86.by_name("x86_psll");
@ -1900,9 +1904,35 @@ fn define_simd(
            rec_fa.opcodes(low),
        );
    }
+
+    // SIMD narrow/widen
    for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
-        let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes));
+        let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
+    }
+    for (ty, opcodes, isap) in &[
+        (I16, &PACKUSWB[..], None),
+        (I32, &PACKUSDW[..], Some(use_sse41_simd)),
+    ] {
+        let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
+    }
+    for (ty, swiden_opcode, uwiden_opcode) in &[
+        (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
+        (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
+    ] {
+        let isap = Some(use_sse41_simd);
+        let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
+        let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
+    }
+    for ty in &[I8, I16, I32, I64] {
+        e.enc_both_inferred_maybe_isap(
+            x86_palignr.bind(vector(*ty, sse_vector_size)),
+            rec_fa_ib.opcodes(&PALIGNR[..]),
+            Some(use_ssse3_simd),
+        );
    }

    // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
@ -454,35 +454,6 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let I16xN = &TypeVar::new(
-        "I16xN",
-        "A SIMD vector type containing integers 16-bits wide and up",
-        TypeSetBuilder::new()
-            .ints(16..32)
-            .simd_lanes(4..8)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", I16xN);
-    let y = &Operand::new("y", I16xN);
-    let a = &Operand::new("a", &I16xN.split_lanes());
-
-    ig.push(
-        Inst::new(
-            "x86_packss",
-            r#"
-        Convert packed signed integers the lanes of ``x`` and ``y`` into half-width integers, using
-        signed saturation to handle overflows. For example, with notional i16x2 vectors, where 
-        ``x = [x1, x0]`` and ``y = [y1, y0]``, this operation would result in 
-        ``a = [y1', y0', x1', x0']`` (using the Intel manual's right-to-left lane ordering).
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
    let x = &Operand::new("x", FxN);
    let y = &Operand::new("y", FxN);
    let a = &Operand::new("a", FxN);
@ -693,6 +664,21 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    let c = &Operand::new("c", uimm8)
+        .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
+    ig.push(
+        Inst::new(
+            "x86_palignr",
+            r#"
+        Concatenate destination and source operands, extracting a byte-aligned result shifted to 
+        the right by `c`.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, c])
+        .operands_out(vec![a]),
+    );
+
    let i64_t = &TypeVar::new(
        "i64_t",
        "A scalar 64bit integer",
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@ -383,6 +383,7 @@ fn define_simd(
    let fcmp = insts.by_name("fcmp");
    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
    let fmax = insts.by_name("fmax");
    let fmin = insts.by_name("fmin");
    let fneg = insts.by_name("fneg");
@ -405,14 +406,19 @@ fn define_simd(
    let uadd_sat = insts.by_name("uadd_sat");
    let umax = insts.by_name("umax");
    let umin = insts.by_name("umin");
+    let snarrow = insts.by_name("snarrow");
+    let swiden_high = insts.by_name("swiden_high");
+    let swiden_low = insts.by_name("swiden_low");
    let ushr_imm = insts.by_name("ushr_imm");
    let ushr = insts.by_name("ushr");
+    let uwiden_high = insts.by_name("uwiden_high");
+    let uwiden_low = insts.by_name("uwiden_low");
    let vconst = insts.by_name("vconst");
    let vall_true = insts.by_name("vall_true");
    let vany_true = insts.by_name("vany_true");
    let vselect = insts.by_name("vselect");

-    let x86_packss = x86_instructions.by_name("x86_packss");
+    let x86_palignr = x86_instructions.by_name("x86_palignr");
    let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
    let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
    let x86_pmins = x86_instructions.by_name("x86_pmins");
@ -575,7 +581,7 @@ fn define_simd(
                def!(g = raw_bitcast_i16x8_again(f)),
                def!(h = x86_psra(g, b)),
                // Re-pack the vector.
-                def!(z = x86_packss(e, h)),
+                def!(z = snarrow(e, h)),
            ],
        );
    }
@ -785,6 +791,26 @@ fn define_simd(
        );
    }

+    // SIMD widen
+    for ty in &[I8, I16] {
+        let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = swiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = swiden_low(c)),
+            ],
+        );
+        let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = uwiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = uwiden_low(c)),
+            ],
+        );
+    }
+
    narrow.custom_legalize(shuffle, "convert_shuffle");
    narrow.custom_legalize(extractlane, "convert_extractlane");
    narrow.custom_legalize(insertlane, "convert_insertlane");
@ -797,4 +823,5 @@ fn define_simd(

    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
+    narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
 }
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
@ -14,7 +14,7 @@ mod legalize;
 mod opcodes;
 mod recipes;
 mod registers;
-mod settings;
+pub(crate) mod settings;

 pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    let settings = settings::define(&shared_defs.settings);
@ -47,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
    x86_32.legalize_type(F32, x86_expand);
    x86_32.legalize_type(F64, x86_expand);
+    x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
    x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
    x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

@ -60,6 +61,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
    x86_64.legalize_type(F32, x86_expand);
    x86_64.legalize_type(F64, x86_expand);
+    x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
    x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
    x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);

--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@ -314,7 +314,7 @@ pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
 /// xmm1 (SSSE3).
 pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];

-/// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
 /// integers in xmm1 using signed saturation (SSE2).
 pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];

@ -322,6 +322,14 @@ pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
 /// word integers in xmm1 using signed saturation (SSE2).
 pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];

+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
+/// integers in xmm1 using unsigned saturation (SSE2).
+pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
+
+/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
+/// word integers in xmm1 using unsigned saturation (SSE4.1).
+pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
+
 /// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
 pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];

@ -346,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
 /// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
 pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];

+/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
+/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
+pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
+
 /// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
 pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];

@ -465,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
 pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];

 /// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];

 /// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
@ -477,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
 pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];

 /// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];

 /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
 pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
    let mut settings = SettingGroupBuilder::new("x86");

-    settings.add_bool(
-        "use_new_backend",
-        "Whether to use the new codegen backend using the new isel",
-        false,
-    );
-
    // CPUID.01H:ECX
    let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
    let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
--- a/third_party/rust/cranelift-codegen-meta/src/lib.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/lib.rs
@ -25,7 +25,11 @@ pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
 }

 /// Generates all the Rust source files used in Cranelift from the meta-language.
-pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
+pub fn generate(
+    old_backend_isas: &[isa::Isa],
+    new_backend_isas: &[isa::Isa],
+    out_dir: &str,
+) -> Result<(), error::Error> {
    // Create all the definitions:
    // - common definitions.
    let mut shared_defs = shared::define();
@ -39,7 +43,7 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
    gen_types::generate("types.rs", &out_dir)?;

    // - per ISA definitions.
-    let isas = isa::define(isas, &mut shared_defs);
+    let target_isas = isa::define(old_backend_isas, &mut shared_defs);

    // At this point, all definitions are done.
    let all_formats = shared_defs.verify_instruction_formats();
@ -53,9 +57,22 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
        &out_dir,
    )?;

-    gen_legalizer::generate(&isas, &shared_defs.transform_groups, "legalize", &out_dir)?;
+    let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() {
+        // The new backend only requires the "expand" legalization group.
+        &["expand"]
+    } else {
+        &[]
+    };

-    for isa in isas {
+    gen_legalizer::generate(
+        &target_isas,
+        &shared_defs.transform_groups,
+        extra_legalization_groups,
+        "legalize",
+        &out_dir,
+    )?;
+
+    for isa in target_isas {
        gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;

        gen_settings::generate(
@ -80,5 +97,28 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
        )?;
    }

+    for isa in new_backend_isas {
+        match isa {
+            isa::Isa::X86 => {
+                // If the old backend ISAs contained x86, this file has already been generated.
+                if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) {
+                    continue;
+                }
+
+                let settings = crate::isa::x86::settings::define(&shared_defs.settings);
+                gen_settings::generate(
+                    &settings,
+                    gen_settings::ParentGroup::Shared,
+                    "settings-x86.rs",
+                    &out_dir,
+                )?;
+            }
+            isa::Isa::Arm64 => {
+                // aarch64 doesn't have platform-specific settings.
+            }
+            isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
+        }
+    }
+
    Ok(())
 }
--- a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@ -3883,6 +3883,126 @@ pub(crate) fn define(
        .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
    );

+    let I16or32xN = &TypeVar::new(
+        "I16or32xN",
+        "A SIMD vector type containing integer lanes 16 or 32 bits wide",
+        TypeSetBuilder::new()
+            .ints(16..32)
+            .simd_lanes(4..8)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I16or32xN);
+    let y = &Operand::new("y", I16or32xN);
+    let a = &Operand::new("a", &I16or32xN.split_lanes());
+
+    ig.push(
+        Inst::new(
+            "snarrow",
+            r#"
+        Combine `x` and `y` into a vector with twice the lanes but half the integer width while 
+        saturating overflowing values to the signed maximum and minimum.
+        
+        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+        returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "unarrow",
+            r#"
+        Combine `x` and `y` into a vector with twice the lanes but half the integer width while 
+        saturating overflowing values to the unsigned maximum and minimum.
+        
+        Note that all input lanes are considered signed: any negative lanes will overflow and be
+        replaced with the unsigned minimum, `0x00`.
+        
+        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+        returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let I8or16xN = &TypeVar::new(
+        "I8or16xN",
+        "A SIMD vector type containing integer lanes 8 or 16 bits wide.",
+        TypeSetBuilder::new()
+            .ints(8..16)
+            .simd_lanes(8..16)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I8or16xN);
+    let a = &Operand::new("a", &I8or16xN.merge_lanes());
+
+    ig.push(
+        Inst::new(
+            "swiden_low",
+            r#"
+        Widen the low lanes of `x` using signed extension.
+        
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "swiden_high",
+            r#"
+        Widen the high lanes of `x` using signed extension.
+        
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_low",
+            r#"
+        Widen the low lanes of `x` using unsigned extension.
+        
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_high",
+            r#"
+        Widen the high lanes of `x` using unsigned extension.
+        
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
    let IntTo = &TypeVar::new(
        "IntTo",
        "A larger integer type with the same number of lanes",
--- a/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-shared/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"add374d0c310a5bdcd081ea5c9b87f3cd99e78fd9f94e1318b386da6a6d60c08","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
+{"files":{"Cargo.toml":"f091891e7b42864e1ef40c5c30724d785403727692ae66b623888367c329efcd","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
--- a/third_party/rust/cranelift-codegen-shared/Cargo.toml
+++ b/third_party/rust/cranelift-codegen-shared/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen-shared"
-version = "0.65.0"
+version = "0.66.0"
 description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/bytecodealliance/wasmtime"
--- a/third_party/rust/cranelift-codegen/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen/.cargo-checksum.json
--- a/third_party/rust/cranelift-codegen/Cargo.toml
+++ b/third_party/rust/cranelift-codegen/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen"
-version = "0.65.0"
+version = "0.66.0"
 description = "Low-level code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-codegen"
@ -13,9 +13,9 @@ build = "build.rs"
 edition = "2018"

 [dependencies]
-cranelift-codegen-shared = { path = "./shared", version = "0.65.0" }
-cranelift-entity = { path = "../entity", version = "0.65.0" }
-cranelift-bforest = { path = "../bforest", version = "0.65.0" }
+cranelift-codegen-shared = { path = "./shared", version = "0.66.0" }
+cranelift-entity = { path = "../entity", version = "0.66.0" }
+cranelift-bforest = { path = "../bforest", version = "0.66.0" }
 hashbrown = { version = "0.7", optional = true }
 target-lexicon = "0.10"
 log = { version = "0.4.6", default-features = false }
@ -26,15 +26,15 @@ smallvec = { version = "1.0.0" }
 thiserror = "1.0.4"
 byteorder = { version = "1.3.2", default-features = false }
 peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.2.0" }
-regalloc = "0.0.26"
+regalloc = { version = "0.0.28" }
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
 # accomodated in `tests`.

 [build-dependencies]
-cranelift-codegen-meta = { path = "meta", version = "0.65.0" }
-peepmatic = { path = "../peepmatic", optional = true, version = "0.65.0" }
+cranelift-codegen-meta = { path = "meta", version = "0.66.0" }
+peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" }

 [features]
 default = ["std", "unwind"]
@ -66,7 +66,6 @@ x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel
 # Option to enable all architectures.
 all-arch = [
    "x86",
-    "x64",
    "arm32",
    "arm64",
    "riscv"
--- a/third_party/rust/cranelift-codegen/build.rs
+++ b/third_party/rust/cranelift-codegen/build.rs
@ -26,7 +26,15 @@ fn main() {
    let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
    let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");

-    // Configure isa targets cfg.
+    let new_backend_isas = if env::var("CARGO_FEATURE_X64").is_ok() {
+        // The x64 (new backend for x86_64) is a bit particular: it only requires generating
+        // the shared meta code; the only ISA-specific code is for settings.
+        vec![meta::isa::Isa::X86]
+    } else {
+        Vec::new()
+    };
+
+    // Configure isa targets using the old backend.
    let isa_targets = meta::isa::Isa::all()
        .iter()
        .cloned()
@ -36,7 +44,7 @@ fn main() {
        })
        .collect::<Vec<_>>();

-    let isas = if isa_targets.is_empty() {
+    let old_backend_isas = if new_backend_isas.is_empty() && isa_targets.is_empty() {
        // Try to match native target.
        let target_name = target_triple.split('-').next().unwrap();
        let isa = meta::isa_from_arch(&target_name).expect("error when identifying target");
@ -56,14 +64,23 @@ fn main() {
        crate_dir.join("build.rs").to_str().unwrap()
    );

-    if let Err(err) = meta::generate(&isas, &out_dir) {
+    if let Err(err) = meta::generate(&old_backend_isas, &new_backend_isas, &out_dir) {
        eprintln!("Error: {}", err);
        process::exit(1);
    }

    if env::var("CRANELIFT_VERBOSE").is_ok() {
-        for isa in &isas {
-            println!("cargo:warning=Includes support for {} ISA", isa.to_string());
+        for isa in &old_backend_isas {
+            println!(
+                "cargo:warning=Includes old-backend support for {} ISA",
+                isa.to_string()
+            );
+        }
+        for isa in &new_backend_isas {
+            println!(
+                "cargo:warning=Includes new-backend support for {} ISA",
+                isa.to_string()
+            );
        }
        println!(
            "cargo:warning=Build step took {:?}.",
--- a/third_party/rust/cranelift-codegen/src/inst_predicates.rs
+++ b/third_party/rust/cranelift-codegen/src/inst_predicates.rs
@ -61,3 +61,10 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
        _ => None,
    }
 }
+
+/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the
+/// embedding, and so requires reftyped values to be enumerated with a stackmap)?
+pub fn is_safepoint(func: &Function, inst: Inst) -> bool {
+    let op = func.dfg[inst].opcode();
+    op.is_resumable_trap() || op.is_call()
+}
--- a/third_party/rust/cranelift-codegen/src/ir/constant.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/constant.rs
@ -166,7 +166,7 @@ pub type ConstantOffset = u32;
 /// function body); because the function is not yet compiled when constants are inserted,
 /// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset
 /// from the beginning of the function is known (see
-/// [`relaxation.rs`](crate::binemit::relaxation)).
+/// `relaxation` in `relaxation.rs`).
 #[derive(Clone)]
 pub struct ConstantPoolEntry {
    data: ConstantData,
--- a/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
@ -11,6 +11,8 @@ use crate::machinst::RelocDistance;
 use alloc::vec::Vec;
 use core::fmt;
 use core::str::FromStr;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};

 /// Function signature.
 ///
@ -20,6 +22,7 @@ use core::str::FromStr;
 /// A signature can optionally include ISA-specific ABI information which specifies exactly how
 /// arguments and return values are passed.
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Signature {
    /// The arguments passed to the function.
    pub params: Vec<AbiParam>,
@ -145,6 +148,7 @@ impl fmt::Display for Signature {
 /// This describes the value type being passed to or from a function along with flags that affect
 /// how the argument is passed.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct AbiParam {
    /// Type of the argument value.
    pub value_type: Type,
@ -255,6 +259,7 @@ impl fmt::Display for AbiParam {
 /// On some architectures, small integer function arguments are extended to the width of a
 /// general-purpose register.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ArgumentExtension {
    /// No extension, high bits are indeterminate.
    None,
@ -272,6 +277,7 @@ pub enum ArgumentExtension {
 ///
 /// The argument purpose is used to indicate any special meaning of an argument or return value.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ArgumentPurpose {
    /// A normal user program value passed to or from a function.
    Normal,
--- a/third_party/rust/cranelift-codegen/src/ir/immediates.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/immediates.rs
@ -8,6 +8,8 @@ use alloc::vec::Vec;
 use core::fmt::{self, Display, Formatter};
 use core::str::FromStr;
 use core::{i32, u32};
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};

 /// Convert a type into a vector of bytes; all implementors in this file must use little-endian
 /// orderings of bytes to match WebAssembly's little-endianness.
@ -325,6 +327,7 @@ impl FromStr for Uimm32 {
 ///
 /// This is used as an immediate value in SIMD instructions.
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct V128Imm(pub [u8; 16]);

 impl V128Imm {
--- a/third_party/rust/cranelift-codegen/src/ir/instructions.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/instructions.rs
@ -584,6 +584,9 @@ enum OperandConstraint {

    /// This operand is `ctrlType.split_lanes()`.
    SplitLanes,
+
+    /// This operand is `ctrlType.merge_lanes()`.
+    MergeLanes,
 }

 impl OperandConstraint {
@ -615,6 +618,11 @@ impl OperandConstraint {
                    .split_lanes()
                    .expect("invalid type for split_lanes"),
            ),
+            MergeLanes => Bound(
+                ctrl_type
+                    .merge_lanes()
+                    .expect("invalid type for merge_lanes"),
+            ),
        }
    }
 }
--- a/third_party/rust/cranelift-codegen/src/ir/types.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/types.rs
@ -3,6 +3,8 @@
 use core::default::Default;
 use core::fmt::{self, Debug, Display, Formatter};
 use cranelift_codegen_shared::constants;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 use target_lexicon::{PointerWidth, Triple};

 /// The type of an SSA value.
@ -21,6 +23,7 @@ use target_lexicon::{PointerWidth, Triple};
 /// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
 ///
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Type(u8);

 /// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector.
@ -281,7 +284,7 @@ impl Type {

    /// Split the lane width in half and double the number of lanes to maintain the same bit-width.
    ///
-    /// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2.
+    /// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`.
    pub fn split_lanes(self) -> Option<Self> {
        match self.half_width() {
            Some(half_width) => half_width.by(2),
@ -289,6 +292,17 @@ impl Type {
        }
    }

+    /// Merge lanes to half the number of lanes and double the lane width to maintain the same
+    /// bit-width.
+    ///
+    /// If this is a scalar type, it will return `None`.
+    pub fn merge_lanes(self) -> Option<Self> {
+        match self.double_width() {
+            Some(double_width) => double_width.half_vector(),
+            None => None,
+        }
+    }
+
    /// Index of this type, for use with hash tables etc.
    pub fn index(self) -> usize {
        usize::from(self.0)
--- a/third_party/rust/cranelift-codegen/src/ir/valueloc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/valueloc.rs
@ -98,6 +98,7 @@ impl<'a> fmt::Display for DisplayValueLoc<'a> {
 /// - For register arguments, there is usually no difference, but if we ever add support for a
 ///   register-window ISA like SPARC, register arguments would also need to be translated.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ArgumentLoc {
    /// This argument has not been assigned to a location yet.
    Unassigned,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
@ -23,7 +23,7 @@
 //! being adjusted to set up a call, we implement a "nominal SP" tracking
 //! feature by which a fixup (distance between actual SP and a "nominal" SP) is
 //! known at each instruction. See the documentation for
-//! [MemArg::NominalSPOffset] for more on this.
+//! `MemArg::NominalSPOffset` for more on this.
 //!
 //! The stack looks like:
 //!
@ -90,12 +90,13 @@
 //!   - Return v1 in memory at `[P+8]`.
 //!   - Return v0 in memory at `[P+16]`.

+use crate::binemit::Stackmap;
 use crate::ir;
 use crate::ir::types;
 use crate::ir::types::*;
 use crate::ir::{ArgumentExtension, StackSlot};
 use crate::isa;
-use crate::isa::aarch64::{inst::*, lower::ty_bits};
+use crate::isa::aarch64::{inst::EmitState, inst::*, lower::ty_bits};
 use crate::machinst::*;
 use crate::settings;
 use crate::{CodegenError, CodegenResult};
@ -372,7 +373,10 @@ pub struct AArch64ABIBody {
    clobbered: Set<Writable<RealReg>>,
    /// Total number of spillslots, from regalloc.
    spillslots: Option<usize>,
-    /// Total frame size.
+    /// "Total frame size", as defined by "distance between FP and nominal-SP".
+    /// Some items are pushed below nominal SP, so the function may actually use
+    /// more stack than this would otherwise imply. It is simply the initial
+    /// frame/allocation size needed for stackslots and spillslots.
    total_frame_size: Option<u32>,
    /// The register holding the return-area pointer, if needed.
    ret_area_ptr: Option<Writable<Reg>>,
@ -400,6 +404,8 @@ fn in_int_reg(ty: ir::Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 | types::I64 => true,
        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
+        types::R64 => true,
+        types::R32 => panic!("Unexpected 32-bit reference on a 64-bit platform!"),
        _ => false,
    }
 }
@ -631,14 +637,11 @@ impl AArch64ABIBody {
                rn: stack_reg(),
                rm: stack_limit,
            });
-            insts.push(Inst::OneWayCondBr {
-                target: BranchTarget::ResolvedOffset(8),
-                // Here `Hs` == "higher or same" when interpreting the two
-                // operands as unsigned integers.
-                kind: CondBrKind::Cond(Cond::Hs),
-            });
-            insts.push(Inst::Udf {
+            insts.push(Inst::TrapIf {
                trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
+                // Here `Lo` == "less than" when interpreting the two
+                // operands as unsigned integers.
+                kind: CondBrKind::Cond(Cond::Lo),
            });
        }
    }
@ -656,12 +659,12 @@ fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
            mem,
            srcloc: None,
        },
-        types::B32 | types::I32 => Inst::ULoad32 {
+        types::B32 | types::I32 | types::R32 => Inst::ULoad32 {
            rd: into_reg,
            mem,
            srcloc: None,
        },
-        types::B64 | types::I64 => Inst::ULoad64 {
+        types::B64 | types::I64 | types::R64 => Inst::ULoad64 {
            rd: into_reg,
            mem,
            srcloc: None,
@ -692,12 +695,12 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
            mem,
            srcloc: None,
        },
-        types::B32 | types::I32 => Inst::Store32 {
+        types::B32 | types::I32 | types::R32 => Inst::Store32 {
            rd: from_reg,
            mem,
            srcloc: None,
        },
-        types::B64 | types::I64 => Inst::Store64 {
+        types::B64 | types::I64 | types::R64 => Inst::Store64 {
            rd: from_reg,
            mem,
            srcloc: None,
@ -813,6 +816,35 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
    caller_saved
 }

+fn gen_sp_adjust_insts<F: FnMut(Inst)>(adj: u64, is_sub: bool, mut f: F) {
+    let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
+
+    if let Some(imm12) = Imm12::maybe_from_u64(adj) {
+        let adj_inst = Inst::AluRRImm12 {
+            alu_op,
+            rd: writable_stack_reg(),
+            rn: stack_reg(),
+            imm12,
+        };
+        f(adj_inst);
+    } else {
+        let tmp = writable_spilltmp_reg();
+        let const_inst = Inst::LoadConst64 {
+            rd: tmp,
+            const_data: adj,
+        };
+        let adj_inst = Inst::AluRRRExtend {
+            alu_op,
+            rd: writable_stack_reg(),
+            rn: stack_reg(),
+            rm: tmp.to_reg(),
+            extendop: ExtendOp::UXTX,
+        };
+        f(const_inst);
+        f(adj_inst);
+    }
+}
+
 impl ABIBody for AArch64ABIBody {
    type I = Inst;

@ -1027,6 +1059,29 @@ impl ABIBody for AArch64ABIBody {
        store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty)
    }

+    fn spillslots_to_stackmap(&self, slots: &[SpillSlot], state: &EmitState) -> Stackmap {
+        assert!(state.virtual_sp_offset >= 0);
+        trace!(
+            "spillslots_to_stackmap: slots = {:?}, state = {:?}",
+            slots,
+            state
+        );
+        let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32;
+        let map_words = (map_size + 7) / 8;
+        let mut bits = std::iter::repeat(false)
+            .take(map_words as usize)
+            .collect::<Vec<bool>>();
+
+        let first_spillslot_word =
+            ((self.stackslots_size + state.virtual_sp_offset as u32) / 8) as usize;
+        for &slot in slots {
+            let slot = slot.get() as usize;
+            bits[first_spillslot_word + slot] = true;
+        }
+
+        Stackmap::from_slice(&bits[..])
+    }
+
    fn gen_prologue(&mut self) -> Vec<Inst> {
        let mut insts = vec![];
        if !self.call_conv.extends_baldrdash() {
@ -1062,6 +1117,9 @@ impl ABIBody for AArch64ABIBody {
        }
        let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.

+        let mut total_sp_adjust = 0;
+        let mut nominal_sp_to_real_sp = 0;
+
        if !self.call_conv.extends_baldrdash() {
            // Leaf functions with zero stack don't need a stack check if one's
            // specified, otherwise always insert the stack check.
@ -1072,42 +1130,29 @@ impl ABIBody for AArch64ABIBody {
                }
            }
            if total_stacksize > 0 {
-                // sub sp, sp, #total_stacksize
-                if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
-                    let sub_inst = Inst::AluRRImm12 {
-                        alu_op: ALUOp::Sub64,
-                        rd: writable_stack_reg(),
-                        rn: stack_reg(),
-                        imm12,
-                    };
-                    insts.push(sub_inst);
-                } else {
-                    let tmp = writable_spilltmp_reg();
-                    let const_inst = Inst::LoadConst64 {
-                        rd: tmp,
-                        const_data: total_stacksize as u64,
-                    };
-                    let sub_inst = Inst::AluRRRExtend {
-                        alu_op: ALUOp::Sub64,
-                        rd: writable_stack_reg(),
-                        rn: stack_reg(),
-                        rm: tmp.to_reg(),
-                        extendop: ExtendOp::UXTX,
-                    };
-                    insts.push(const_inst);
-                    insts.push(sub_inst);
-                }
+                total_sp_adjust += total_stacksize as u64;
            }
        }

-        // N.B.: "nominal SP", which we use to refer to stackslots
-        // and spillslots, is *here* (the value of SP at this program point).
+        // N.B.: "nominal SP", which we use to refer to stackslots and
+        // spillslots, is defined to be equal to the stack pointer at this point
+        // in the prologue.
+        //
        // If we push any clobbers below, we emit a virtual-SP adjustment
        // meta-instruction so that the nominal-SP references behave as if SP
        // were still at this point. See documentation for
        // [crate::isa::aarch64::abi](this module) for more details on
        // stackframe layout and nominal-SP maintenance.

+        if total_sp_adjust > 0 {
+            // sub sp, sp, #total_stacksize
+            gen_sp_adjust_insts(
+                total_sp_adjust,
+                /* is_sub = */ true,
+                |inst| insts.push(inst),
+            );
+        }
+
        // Save clobbered registers.
        let (clobbered_int, clobbered_vec) =
            get_callee_saves(self.call_conv, self.clobbered.to_vec());
@ -1151,10 +1196,11 @@ impl ABIBody for AArch64ABIBody {
                srcloc: None,
            });
        }
+        nominal_sp_to_real_sp += clobber_size as i64;

        if clobber_size > 0 {
            insts.push(Inst::VirtualSPOffsetAdj {
-                offset: clobber_size as i64,
+                offset: nominal_sp_to_real_sp,
            });
        }

@ -1248,6 +1294,10 @@ impl ABIBody for AArch64ABIBody {
            .expect("frame size not computed before prologue generation")
    }

+    fn stack_args_size(&self) -> u32 {
+        self.sig.stack_arg_space as u32
+    }
+
    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
        // We allocate in terms of 8-byte slots.
        match (rc, ty) {
@ -1258,15 +1308,42 @@ impl ABIBody for AArch64ABIBody {
        }
    }

-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst {
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Inst {
+        let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty);
        self.store_spillslot(to_slot, ty, from_reg.to_reg())
    }

-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Inst {
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        ty: Option<Type>,
+    ) -> Inst {
+        let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty);
        self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
    }
 }

+/// Return a type either from an optional type hint, or if not, from the default
+/// type associated with the given register's class. This is used to generate
+/// loads/spills appropriately given the type of value loaded/stored (which may
+/// be narrower than the spillslot). We usually have the type because the
+/// regalloc usually provides the vreg being spilled/reloaded, and we know every
+/// vreg's type. However, the regalloc *can* request a spill/reload without an
+/// associated vreg when needed to satisfy a safepoint (which requires all
+/// ref-typed values, even those in real registers in the original vcode, to be
+/// in spillslots).
+fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
+    match (ty, r.get_class()) {
+        // If the type is provided
+        (Some(t), _) => t,
+        // If no type is provided, this should be a register spill for a
+        // safepoint, so we only expect I64 (integer) registers.
+        (None, RegClass::I64) => I64,
+        _ => panic!("Unexpected register class!"),
+    }
+}
+
 enum CallDest {
    ExtName(ir::ExternalName, RelocDistance),
    Reg(Reg),
@ -1345,7 +1422,7 @@ impl AArch64ABICall {
    }
 }

-fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
+fn adjust_stack_and_nominal_sp<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
    if amount == 0 {
        return;
    }
@ -1359,27 +1436,9 @@ fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
        offset: sp_adjustment,
    });

-    let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
-    if let Some(imm12) = Imm12::maybe_from_u64(amount) {
-        ctx.emit(Inst::AluRRImm12 {
-            alu_op,
-            rd: writable_stack_reg(),
-            rn: stack_reg(),
-            imm12,
-        })
-    } else {
-        ctx.emit(Inst::LoadConst64 {
-            rd: writable_spilltmp_reg(),
-            const_data: amount,
-        });
-        ctx.emit(Inst::AluRRRExtend {
-            alu_op,
-            rd: writable_stack_reg(),
-            rn: stack_reg(),
-            rm: spilltmp_reg(),
-            extendop: ExtendOp::UXTX,
-        });
-    }
+    gen_sp_adjust_insts(amount, is_sub, |inst| {
+        ctx.emit(inst);
+    });
 }

 impl ABICall for AArch64ABICall {
@ -1395,12 +1454,12 @@ impl ABICall for AArch64ABICall {

    fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
        let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
-        adjust_stack(ctx, off as u64, /* is_sub = */ true)
+        adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ true)
    }

    fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
        let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
-        adjust_stack(ctx, off as u64, /* is_sub = */ false)
+        adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ false)
    }

    fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
@ -1455,7 +1514,7 @@ impl ABICall for AArch64ABICall {
            self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
        }
        match &self.dest {
-            &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
+            &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint(Inst::Call {
                info: Box::new(CallInfo {
                    dest: name.clone(),
                    uses,
@ -1471,7 +1530,7 @@ impl ABICall for AArch64ABICall {
                    offset: 0,
                    srcloc: self.loc,
                });
-                ctx.emit(Inst::CallInd {
+                ctx.emit_safepoint(Inst::CallInd {
                    info: Box::new(CallIndInfo {
                        rn: spilltmp_reg(),
                        uses,
@ -1481,7 +1540,7 @@ impl ABICall for AArch64ABICall {
                    }),
                });
            }
-            &CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
+            &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::CallInd {
                info: Box::new(CallIndInfo {
                    rn: reg,
                    uses,
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@ -3,6 +3,7 @@
 // Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]

+use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
 use crate::ir::Type;
 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::lower::ty_bits;
@ -403,8 +404,8 @@ impl ShowWithRRU for MemArg {
            &MemArg::RegScaledExtended(r1, r2, ty, op) => {
                let shift = shift_for_type(ty);
                let size = match op {
-                    ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
-                    _ => InstSize::Size64,
+                    ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+                    _ => OperandSize::Size64,
                };
                let op = op.show_rru(mb_rru);
                format!(
@ -417,8 +418,8 @@ impl ShowWithRRU for MemArg {
            }
            &MemArg::RegExtended(r1, r2, op) => {
                let size = match op {
-                    ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
-                    _ => InstSize::Size64,
+                    ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+                    _ => OperandSize::Size64,
                };
                let op = op.show_rru(mb_rru);
                format!(
@ -492,67 +493,150 @@ impl ShowWithRRU for BranchTarget {
 }

 /// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
-/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
-/// variants of vector instructions.
-/// TODO: Create a separate type for SIMD & floating-point operands.
+/// 64-bit variants of many instructions (and integer registers).
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum InstSize {
+pub enum OperandSize {
    Size32,
    Size64,
-    Size128,
 }

-impl InstSize {
+impl OperandSize {
    /// 32-bit case?
    pub fn is32(self) -> bool {
-        self == InstSize::Size32
+        self == OperandSize::Size32
    }
    /// 64-bit case?
    pub fn is64(self) -> bool {
-        self == InstSize::Size64
+        self == OperandSize::Size64
    }
-    /// Convert from an `is32` boolean flag to an `InstSize`.
-    pub fn from_is32(is32: bool) -> InstSize {
+    /// Convert from an `is32` boolean flag to an `OperandSize`.
+    pub fn from_is32(is32: bool) -> OperandSize {
        if is32 {
-            InstSize::Size32
+            OperandSize::Size32
        } else {
-            InstSize::Size64
+            OperandSize::Size64
        }
    }
    /// Convert from a needed width to the smallest size that fits.
-    pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
+    pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize {
        let bits: usize = bits.into();
-        assert!(bits <= 128);
+        assert!(bits <= 64);
        if bits <= 32 {
-            InstSize::Size32
-        } else if bits <= 64 {
-            InstSize::Size64
+            OperandSize::Size32
        } else {
-            InstSize::Size128
+            OperandSize::Size64
        }
    }

    /// Convert from an integer type into the smallest size that fits.
-    pub fn from_ty(ty: Type) -> InstSize {
+    pub fn from_ty(ty: Type) -> OperandSize {
        Self::from_bits(ty_bits(ty))
    }

    /// Convert to I32, I64, or I128.
    pub fn to_ty(self) -> Type {
        match self {
-            InstSize::Size32 => I32,
-            InstSize::Size64 => I64,
-            InstSize::Size128 => I128,
+            OperandSize::Size32 => I32,
+            OperandSize::Size64 => I64,
        }
    }

    pub fn sf_bit(&self) -> u32 {
        match self {
-            InstSize::Size32 => 0,
-            InstSize::Size64 => 1,
-            _ => {
-                panic!("Unexpected size");
-            }
+            OperandSize::Size32 => 0,
+            OperandSize::Size64 => 1,
+        }
+    }
+}
+
+/// Type used to communicate the size of a scalar SIMD & FP operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ScalarSize {
+    Size8,
+    Size16,
+    Size32,
+    Size64,
+    Size128,
+}
+
+impl ScalarSize {
+    /// Convert from a needed width to the smallest size that fits.
+    pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
+        match bits.into().next_power_of_two() {
+            8 => ScalarSize::Size8,
+            16 => ScalarSize::Size16,
+            32 => ScalarSize::Size32,
+            64 => ScalarSize::Size64,
+            128 => ScalarSize::Size128,
+            _ => panic!("Unexpected type width"),
+        }
+    }
+
+    /// Convert from a type into the smallest size that fits.
+    pub fn from_ty(ty: Type) -> ScalarSize {
+        Self::from_bits(ty_bits(ty))
+    }
+
+    /// Return the encoding bits that are used by some scalar FP instructions
+    /// for a particular operand size.
+    pub fn ftype(&self) -> u32 {
+        match self {
+            ScalarSize::Size16 => 0b11,
+            ScalarSize::Size32 => 0b00,
+            ScalarSize::Size64 => 0b01,
+            _ => panic!("Unexpected scalar FP operand size"),
+        }
+    }
+}
+
+/// Type used to communicate the size of a vector operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VectorSize {
+    Size8x8,
+    Size8x16,
+    Size16x4,
+    Size16x8,
+    Size32x2,
+    Size32x4,
+    Size64x2,
+}
+
+impl VectorSize {
+    /// Convert from a type into a vector operand size.
+    pub fn from_ty(ty: Type) -> VectorSize {
+        match ty {
+            F32X2 => VectorSize::Size32x2,
+            F32X4 => VectorSize::Size32x4,
+            F64X2 => VectorSize::Size64x2,
+            I8X8 => VectorSize::Size8x8,
+            I8X16 => VectorSize::Size8x16,
+            I16X4 => VectorSize::Size16x4,
+            I16X8 => VectorSize::Size16x8,
+            I32X2 => VectorSize::Size32x2,
+            I32X4 => VectorSize::Size32x4,
+            I64X2 => VectorSize::Size64x2,
+            _ => unimplemented!(),
+        }
+    }
+
+    /// Get the integer operand size that corresponds to a lane of a vector with a certain size.
+    pub fn operand_size(&self) -> OperandSize {
+        match self {
+            VectorSize::Size64x2 => OperandSize::Size64,
+            _ => OperandSize::Size32,
+        }
+    }
+
+    /// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
+    pub fn lane_size(&self) -> ScalarSize {
+        match self {
+            VectorSize::Size8x8 => ScalarSize::Size8,
+            VectorSize::Size8x16 => ScalarSize::Size8,
+            VectorSize::Size16x4 => ScalarSize::Size16,
+            VectorSize::Size16x8 => ScalarSize::Size16,
+            VectorSize::Size32x2 => ScalarSize::Size32,
+            VectorSize::Size32x4 => ScalarSize::Size32,
+            VectorSize::Size64x2 => ScalarSize::Size64,
        }
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@ -1,6 +1,6 @@
 //! AArch64 ISA: binary code emission.

-use crate::binemit::{CodeOffset, Reloc};
+use crate::binemit::{CodeOffset, Reloc, Stackmap};
 use crate::ir::constant::ConstantData;
 use crate::ir::types::*;
 use crate::ir::TrapCode;
@ -282,14 +282,13 @@ fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
        | (cond.bits() << 12)
 }

-fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: InstSize) -> u32 {
-    let ty_bit = if size.is32() { 0 } else { 1 };
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
    0b000_11110_00_1_00000_0000_11_00000_00000
+        | (size.ftype() << 22)
        | (machreg_to_vec(rm) << 16)
        | (machreg_to_vec(rn) << 5)
        | machreg_to_vec(rd.to_reg())
        | (cond.bits() << 12)
-        | (ty_bit << 22)
 }

 fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
@ -298,7 +297,7 @@ fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
        | (cond.invert().bits() << 12)
 }

-fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
+fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
    0b0_1_1_11010010_00000_0000_10_00000_0_0000
        | size.sf_bit() << 31
        | imm.bits() << 16
@ -334,13 +333,11 @@ fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32
        | machreg_to_vec(rd.to_reg())
 }

-fn enc_fcmp(size: InstSize, rn: Reg, rm: Reg) -> u32 {
-    let bits = if size.is32() {
-        0b000_11110_00_1_00000_00_1000_00000_00000
-    } else {
-        0b000_11110_01_1_00000_00_1000_00000_00000
-    };
-    bits | (machreg_to_vec(rm) << 16) | (machreg_to_vec(rn) << 5)
+fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
+    0b000_11110_00_1_00000_00_1000_00000_00000
+        | (size.ftype() << 22)
+        | (machreg_to_vec(rm) << 16)
+        | (machreg_to_vec(rn) << 5)
 }

 fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
@ -355,10 +352,11 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
 }

-fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+fn enc_vec_rr_misc(size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert_eq!(size & 0b11, size);
    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
    let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
-    bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
+    bits | size << 22 | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
 }

 fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
@ -378,7 +376,37 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn:
 /// State carried between emissions of a sequence of instructions.
 #[derive(Default, Clone, Debug)]
 pub struct EmitState {
-    virtual_sp_offset: i64,
+    /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+    /// program point.
+    pub(crate) virtual_sp_offset: i64,
+    /// Offset of FP from nominal-SP.
+    pub(crate) nominal_sp_to_fp: i64,
+    /// Safepoint stackmap for upcoming instruction, as provided to `pre_safepoint()`.
+    stackmap: Option<Stackmap>,
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+    fn new(abi: &dyn ABIBody<I = Inst>) -> Self {
+        EmitState {
+            virtual_sp_offset: 0,
+            nominal_sp_to_fp: abi.frame_size() as i64,
+            stackmap: None,
+        }
+    }
+
+    fn pre_safepoint(&mut self, stackmap: Stackmap) {
+        self.stackmap = Some(stackmap);
+    }
+}
+
+impl EmitState {
+    fn take_stackmap(&mut self) -> Option<Stackmap> {
+        self.stackmap.take()
+    }
+
+    fn clear_post_insn(&mut self) {
+        self.stackmap = None;
+    }
 }

 impl MachInstEmit for Inst {
@ -533,8 +561,16 @@ impl MachInstEmit for Inst {
                    ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
                    ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
                    ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
-                    ALUOp::Lsl32 => (0b0101001100, u32::from(32 - amt), u32::from(31 - amt)),
-                    ALUOp::Lsl64 => (0b1101001101, u32::from(64 - amt), u32::from(63 - amt)),
+                    ALUOp::Lsl32 => (
+                        0b0101001100,
+                        u32::from((32 - amt) % 32),
+                        u32::from(31 - amt),
+                    ),
+                    ALUOp::Lsl64 => (
+                        0b1101001101,
+                        u32::from((64 - amt) % 64),
+                        u32::from(63 - amt),
+                    ),
                    _ => unimplemented!("{:?}", alu_op),
                };
                sink.put4(
@ -604,7 +640,7 @@ impl MachInstEmit for Inst {
            }

            &Inst::BitRR { op, rd, rn, .. } => {
-                let size = if op.inst_size().is32() { 0b0 } else { 0b1 };
+                let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
                let (op1, op2) = match op {
                    BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
                    BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
@ -970,10 +1006,10 @@ impl MachInstEmit for Inst {
            &Inst::FpuMove128 { rd, rn } => {
                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
            }
-            &Inst::FpuMoveFromVec { rd, rn, idx, ty } => {
-                let (imm5, shift, mask) = match ty {
-                    F32 => (0b00100, 3, 0b011),
-                    F64 => (0b01000, 4, 0b001),
+            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+                let (imm5, shift, mask) = match size.lane_size() {
+                    ScalarSize::Size32 => (0b00100, 3, 0b011),
+                    ScalarSize::Size64 => (0b01000, 4, 0b001),
                    _ => unimplemented!(),
                };
                debug_assert_eq!(idx & mask, idx);
@ -1012,6 +1048,10 @@ impl MachInstEmit for Inst {
                    FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
                    FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
                    FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
+                    FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
+                    FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
+                    FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
+                    FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
                };
                sink.put4(enc_fpurrr(top22, rd, rn, rm));
            }
@ -1066,20 +1106,25 @@ impl MachInstEmit for Inst {
                };
                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
            }
-            &Inst::VecMisc { op, rd, rn, ty } => {
-                let bits_12_16 = match op {
-                    VecMisc2::Not => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        0b00101
-                    }
+            &Inst::VecMisc { op, rd, rn, size } => {
+                let enc_size = match size {
+                    VectorSize::Size8x16 => 0b00,
+                    VectorSize::Size16x8 => 0b01,
+                    VectorSize::Size32x4 => 0b10,
+                    VectorSize::Size64x2 => 0b11,
+                    _ => unimplemented!(),
                };
-                sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
+                let (bits_12_16, size) = match op {
+                    VecMisc2::Not => (0b00101, 0b00),
+                    VecMisc2::Neg => (0b01011, enc_size),
+                };
+                sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn));
            }
-            &Inst::VecLanes { op, rd, rn, ty } => {
-                let (q, size) = match ty {
-                    I8X16 => (0b1, 0b00),
-                    I16X8 => (0b1, 0b01),
-                    I32X4 => (0b1, 0b10),
+            &Inst::VecLanes { op, rd, rn, size } => {
+                let (q, size) = match size {
+                    VectorSize::Size8x16 => (0b1, 0b00),
+                    VectorSize::Size16x8 => (0b1, 0b01),
+                    VectorSize::Size32x4 => (0b1, 0b10),
                    _ => unreachable!(),
                };
                let (u, opcode) = match op {
@ -1088,10 +1133,10 @@ impl MachInstEmit for Inst {
                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
            }
            &Inst::FpuCmp32 { rn, rm } => {
-                sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
+                sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
            }
            &Inst::FpuCmp64 { rn, rm } => {
-                sink.put4(enc_fcmp(InstSize::Size64, rn, rm));
+                sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
            }
            &Inst::FpuToInt { op, rd, rn } => {
                let top16 = match op {
@ -1178,10 +1223,10 @@ impl MachInstEmit for Inst {
                }
            }
            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
            }
            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
-                sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size64));
+                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
            }
            &Inst::FpuRound { op, rd, rn } => {
                let top22 = match op {
@ -1203,12 +1248,12 @@ impl MachInstEmit for Inst {
                        | machreg_to_vec(rd.to_reg()),
                );
            }
-            &Inst::MovFromVec { rd, rn, idx, ty } => {
-                let (q, imm5, shift, mask) = match ty {
-                    I8 => (0b0, 0b00001, 1, 0b1111),
-                    I16 => (0b0, 0b00010, 2, 0b0111),
-                    I32 => (0b0, 0b00100, 3, 0b0011),
-                    I64 => (0b1, 0b01000, 4, 0b0001),
+            &Inst::MovFromVec { rd, rn, idx, size } => {
+                let (q, imm5, shift, mask) = match size {
+                    VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
+                    VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
+                    VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
+                    VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
                    _ => unreachable!(),
                };
                debug_assert_eq!(idx & mask, idx);
@ -1221,12 +1266,12 @@ impl MachInstEmit for Inst {
                        | machreg_to_gpr(rd.to_reg()),
                );
            }
-            &Inst::VecDup { rd, rn, ty } => {
-                let imm5 = match ty {
-                    I8 => 0b00001,
-                    I16 => 0b00010,
-                    I32 => 0b00100,
-                    I64 => 0b01000,
+            &Inst::VecDup { rd, rn, size } => {
+                let imm5 = match size {
+                    VectorSize::Size8x16 => 0b00001,
+                    VectorSize::Size16x8 => 0b00010,
+                    VectorSize::Size32x4 => 0b00100,
+                    VectorSize::Size64x2 => 0b01000,
                    _ => unimplemented!(),
                };
                sink.put4(
@ -1236,10 +1281,10 @@ impl MachInstEmit for Inst {
                        | machreg_to_vec(rd.to_reg()),
                );
            }
-            &Inst::VecDupFromFpu { rd, rn, ty } => {
-                let imm5 = match ty {
-                    F32 => 0b00100,
-                    F64 => 0b01000,
+            &Inst::VecDupFromFpu { rd, rn, size } => {
+                let imm5 = match size {
+                    VectorSize::Size32x4 => 0b00100,
+                    VectorSize::Size64x2 => 0b01000,
                    _ => unimplemented!(),
                };
                sink.put4(
@ -1271,37 +1316,26 @@ impl MachInstEmit for Inst {
                rn,
                rm,
                alu_op,
-                ty,
+                size,
            } => {
-                let enc_size = match ty {
-                    I8X16 => 0b00,
-                    I16X8 => 0b01,
-                    I32X4 => 0b10,
+                let enc_size = match size {
+                    VectorSize::Size8x16 => 0b00,
+                    VectorSize::Size16x8 => 0b01,
+                    VectorSize::Size32x4 => 0b10,
+                    VectorSize::Size64x2 => 0b11,
                    _ => 0,
                };
-                let enc_size_for_fcmp = match ty {
-                    F32X4 => 0b0,
-                    F64X2 => 0b1,
+                let enc_size_for_fcmp = match size {
+                    VectorSize::Size32x4 => 0b0,
+                    VectorSize::Size64x2 => 0b1,
                    _ => 0,
                };

                let (top11, bit15_10) = match alu_op {
-                    VecALUOp::SQAddScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b010_11110_11_1, 0b000011)
-                    }
-                    VecALUOp::SQSubScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b010_11110_11_1, 0b001011)
-                    }
-                    VecALUOp::UQAddScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b011_11110_11_1, 0b000011)
-                    }
-                    VecALUOp::UQSubScalar => {
-                        debug_assert_eq!(I64, ty);
-                        (0b011_11110_11_1, 0b001011)
-                    }
+                    VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
+                    VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
+                    VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
+                    VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
                    VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
                    VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
@ -1312,27 +1346,20 @@ impl MachInstEmit for Inst {
                    VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
                    // The following logical instructions operate on bytes, so are not encoded differently
                    // for the different vector types.
-                    VecALUOp::And => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        (0b010_01110_00_1, 0b000111)
-                    }
-                    VecALUOp::Bic => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        (0b010_01110_01_1, 0b000111)
-                    }
-                    VecALUOp::Orr => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        (0b010_01110_10_1, 0b000111)
-                    }
-                    VecALUOp::Eor => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        (0b011_01110_00_1, 0b000111)
-                    }
-                    VecALUOp::Bsl => {
-                        debug_assert_eq!(128, ty_bits(ty));
-                        (0b011_01110_01_1, 0b000111)
-                    }
+                    VecALUOp::And => (0b010_01110_00_1, 0b000111),
+                    VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
+                    VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
+                    VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
+                    VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
                    VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
+                    VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
+                    VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
+                    VecALUOp::Mul => {
+                        debug_assert_ne!(size, VectorSize::Size64x2);
+                        (0b010_01110_00_1 | enc_size << 1, 0b100111)
+                    }
+                    VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
+                    VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001),
                };
                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
            }
@ -1437,6 +1464,9 @@ impl MachInstEmit for Inst {
                // Noop; this is just a placeholder for epilogues.
            }
            &Inst::Call { ref info } => {
+                if let Some(s) = state.take_stackmap() {
+                    sink.add_stackmap(4, s);
+                }
                sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
                sink.put4(enc_jump26(0b100101, 0));
                if info.opcode.is_call() {
@ -1444,6 +1474,9 @@ impl MachInstEmit for Inst {
                }
            }
            &Inst::CallInd { ref info } => {
+                if let Some(s) = state.take_stackmap() {
+                    sink.add_stackmap(4, s);
+                }
                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
                if info.opcode.is_call() {
                    sink.add_call_site(info.loc, info.opcode);
@ -1471,12 +1504,20 @@ impl MachInstEmit for Inst {
                }
                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
            }
-            &Inst::OneWayCondBr { target, kind } => {
+            &Inst::TrapIf { kind, trap_info } => {
+                // condbr KIND, LABEL
                let off = sink.cur_offset();
-                if let Some(l) = target.as_label() {
-                    sink.use_label_at_offset(off, l, LabelUse::Branch19);
-                }
-                sink.put4(enc_conditional_br(target, kind));
+                let label = sink.get_label();
+                sink.put4(enc_conditional_br(
+                    BranchTarget::Label(label),
+                    kind.invert(),
+                ));
+                sink.use_label_at_offset(off, label, LabelUse::Branch19);
+                // udf
+                let trap = Inst::Udf { trap_info };
+                trap.emit(sink, flags, state);
+                // LABEL:
+                sink.bind_label(label);
            }
            &Inst::IndirectBr { rn, .. } => {
                sink.put4(enc_br(rn));
@ -1491,6 +1532,9 @@ impl MachInstEmit for Inst {
            &Inst::Udf { trap_info } => {
                let (srcloc, code) = trap_info;
                sink.add_trap(srcloc, code);
+                if let Some(s) = state.take_stackmap() {
+                    sink.add_stackmap(4, s);
+                }
                sink.put4(0xd4a00000);
            }
            &Inst::Adr { rd, off } => {
@ -1515,6 +1559,17 @@ impl MachInstEmit for Inst {
                // emission time, because we cannot allow the regalloc to insert spills/reloads in
                // the middle; we depend on hardcoded PC-rel addressing below.

+                // Branch to default when condition code from prior comparison indicates.
+                let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
+                // No need to inform the sink's branch folding logic about this branch, because it
+                // will not be merged with any other branch, flipped, or elided (it is not preceded
+                // or succeeded by any other branch). Just emit it with the label use.
+                let default_br_offset = sink.cur_offset();
+                if let BranchTarget::Label(l) = info.default_target {
+                    sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
+                }
+                sink.put4(br);
+
                // Save index in a tmp (the live range of ridx only goes to start of this
                // sequence; rtmp1 or rtmp2 may overwrite it).
                let inst = Inst::gen_move(rtmp2, ridx, I64);
@ -1553,6 +1608,10 @@ impl MachInstEmit for Inst {
                let jt_off = sink.cur_offset();
                for &target in info.targets.iter() {
                    let word_off = sink.cur_offset();
+                    // off_into_table is an addend here embedded in the label to be later patched
+                    // at the end of codegen. The offset is initially relative to this jump table
+                    // entry; with the extra addend, it'll be relative to the jump table's start,
+                    // after patching.
                    let off_into_table = word_off - jt_off;
                    sink.use_label_at_offset(
                        word_off,
@ -1660,7 +1719,7 @@ impl MachInstEmit for Inst {
                debug!(
                    "virtual sp offset adjusted by {} -> {}",
                    offset,
-                    state.virtual_sp_offset + offset
+                    state.virtual_sp_offset + offset,
                );
                state.virtual_sp_offset += offset;
            }
@ -1679,5 +1738,11 @@ impl MachInstEmit for Inst {

        let end_off = sink.cur_offset();
        debug_assert!((end_off - start_off) <= Inst::worst_case_size());
+
+        state.clear_post_insn();
+    }
+
+    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+        self.print_with_state(mb_rru, state)
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@ -4,7 +4,7 @@
 #[allow(dead_code)]
 use crate::ir::types::*;
 use crate::ir::Type;
-use crate::isa::aarch64::inst::InstSize;
+use crate::isa::aarch64::inst::OperandSize;
 use crate::machinst::*;

 use regalloc::RealRegUniverse;
@ -340,7 +340,7 @@ pub struct ImmLogic {
    /// `R` field: rotate amount.
    pub s: u8,
    /// Was this constructed for a 32-bit or 64-bit instruction?
-    pub size: InstSize,
+    pub size: OperandSize,
 }

 impl ImmLogic {
@ -351,7 +351,7 @@ impl ImmLogic {
        if ty != I64 && ty != I32 {
            return None;
        }
-        let inst_size = InstSize::from_ty(ty);
+        let operand_size = OperandSize::from_ty(ty);

        let original_value = value;

@ -532,7 +532,7 @@ impl ImmLogic {
            n: out_n != 0,
            r: r as u8,
            s: s as u8,
-            size: inst_size,
+            size: operand_size,
        })
    }

@ -732,7 +732,7 @@ mod test {
                n: true,
                r: 0,
                s: 0,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(1, I64)
        );
@ -743,7 +743,7 @@ mod test {
                n: true,
                r: 63,
                s: 0,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(2, I64)
        );
@ -758,7 +758,7 @@ mod test {
                n: true,
                r: 61,
                s: 4,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(248, I64)
        );
@ -771,7 +771,7 @@ mod test {
                n: true,
                r: 57,
                s: 3,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(1920, I64)
        );
@ -782,7 +782,7 @@ mod test {
                n: true,
                r: 63,
                s: 13,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x7ffe, I64)
        );
@ -793,7 +793,7 @@ mod test {
                n: true,
                r: 48,
                s: 1,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x30000, I64)
        );
@ -804,7 +804,7 @@ mod test {
                n: true,
                r: 44,
                s: 0,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x100000, I64)
        );
@ -815,7 +815,7 @@ mod test {
                n: true,
                r: 63,
                s: 62,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
        );
@ -826,7 +826,7 @@ mod test {
                n: false,
                r: 1,
                s: 60,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
        );
@ -837,7 +837,7 @@ mod test {
                n: false,
                r: 1,
                s: 49,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x8181818181818181, I64)
        );
@ -848,7 +848,7 @@ mod test {
                n: false,
                r: 10,
                s: 43,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
        );
@ -859,7 +859,7 @@ mod test {
                n: false,
                r: 0,
                s: 0,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x100000001, I64)
        );
@ -870,7 +870,7 @@ mod test {
                n: false,
                r: 0,
                s: 56,
-                size: InstSize::Size64,
+                size: OperandSize::Size64,
            }),
            ImmLogic::maybe_from_u64(0x1111111111111111, I64)
        );
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@ -1,7 +1,8 @@
 //! AArch64 ISA definitions: registers.

-use crate::ir::types::*;
-use crate::isa::aarch64::inst::InstSize;
+use crate::isa::aarch64::inst::OperandSize;
+use crate::isa::aarch64::inst::ScalarSize;
+use crate::isa::aarch64::inst::VectorSize;
 use crate::machinst::*;
 use crate::settings;

@ -255,7 +256,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {

 /// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
 /// its name at the 32-bit size.
-pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
    let mut s = reg.show_rru(mb_rru);
    if reg.get_class() != RegClass::I64 || !size.is32() {
        // We can't do any better.
@ -276,23 +277,8 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
    s
 }

-/// Show a vector register.
-pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
-    let mut s = reg.show_rru(mb_rru);
-    if reg.get_class() != RegClass::V128 {
-        return s;
-    }
-    let prefix = match size {
-        InstSize::Size32 => "s",
-        InstSize::Size64 => "d",
-        InstSize::Size128 => "q",
-    };
-    s.replace_range(0..1, prefix);
-    s
-}
-
 /// Show a vector register used in a scalar context.
-pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
    let mut s = reg.show_rru(mb_rru);
    if reg.get_class() != RegClass::V128 {
        // We can't do any better.
@ -301,13 +287,13 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->

    if reg.is_real() {
        // Change (eg) "v0" into "d0".
-        if reg.get_class() == RegClass::V128 && s.starts_with("v") {
-            let replacement = match ty {
-                I64 | F64 => "d",
-                I8X16 => "b",
-                I16X8 => "h",
-                I32X4 => "s",
-                _ => unimplemented!(),
+        if s.starts_with("v") {
+            let replacement = match size {
+                ScalarSize::Size8 => "b",
+                ScalarSize::Size16 => "h",
+                ScalarSize::Size32 => "s",
+                ScalarSize::Size64 => "d",
+                ScalarSize::Size128 => "q",
            };
            s.replace_range(0..1, replacement);
        }
@ -321,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
 }

 /// Show a vector register.
-pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
+pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
    assert_eq!(RegClass::V128, reg.get_class());
    let mut s = reg.show_rru(mb_rru);

-    match ty {
-        F32X2 => s.push_str(".2s"),
-        F32X4 => s.push_str(".4s"),
-        F64X2 => s.push_str(".2d"),
-        I8X8 => s.push_str(".8b"),
-        I8X16 => s.push_str(".16b"),
-        I16X4 => s.push_str(".4h"),
-        I16X8 => s.push_str(".8h"),
-        I32X2 => s.push_str(".2s"),
-        I32X4 => s.push_str(".4s"),
-        I64X2 => s.push_str(".2d"),
-        _ => unimplemented!(),
-    }
+    let suffix = match size {
+        VectorSize::Size8x8 => ".8b",
+        VectorSize::Size8x16 => ".16b",
+        VectorSize::Size16x4 => ".4h",
+        VectorSize::Size16x8 => ".8h",
+        VectorSize::Size32x2 => ".2s",
+        VectorSize::Size32x4 => ".4s",
+        VectorSize::Size64x2 => ".2d",
+    };

+    s.push_str(suffix);
    s
 }

 /// Show an indexed vector element.
-pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String {
+pub fn show_vreg_element(
+    reg: Reg,
+    mb_rru: Option<&RealRegUniverse>,
+    idx: u8,
+    size: VectorSize,
+) -> String {
    assert_eq!(RegClass::V128, reg.get_class());
    let mut s = reg.show_rru(mb_rru);

-    let suffix = match ty {
-        I8 => "b",
-        I16 => "h",
-        I32 => "s",
-        I64 => "d",
-        F32 => "s",
-        F64 => "d",
-        _ => unimplemented!(),
+    let suffix = match size {
+        VectorSize::Size8x8 => "b",
+        VectorSize::Size8x16 => "b",
+        VectorSize::Size16x4 => "h",
+        VectorSize::Size16x8 => "h",
+        VectorSize::Size32x2 => "s",
+        VectorSize::Size32x4 => "s",
+        VectorSize::Size64x2 => "d",
    };

    s.push_str(&format!(".{}[{}]", suffix, idx));
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::{CodegenError, CodegenResult};
+use crate::CodegenResult;

 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
    ty: Type,
    cond: Cond,
 ) -> CodegenResult<()> {
-    match ty {
-        F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
-        _ => {
-            return Err(CodegenError::Unsupported(format!(
-                "unsupported SIMD type: {:?}",
-                ty
-            )));
-        }
-    };
-
    let is_float = match ty {
        F32X4 | F64X2 => true,
        _ => false,
    };
+    let size = VectorSize::from_ty(ty);
    // 'Less than' operations are implemented by swapping
    // the order of operands and using the 'greater than'
    // instructions.
@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
        rd,
        rn,
        rm,
-        ty,
+        size,
    });

    if cond == Cond::Ne {
@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
            op: VecMisc2::Not,
            rd,
            rn: rd.to_reg(),
-            ty: I8X16,
+            size,
        });
    }

@ -829,8 +820,8 @@ pub fn ty_bits(ty: Type) -> usize {
        B1 => 1,
        B8 | I8 => 8,
        B16 | I16 => 16,
-        B32 | I32 | F32 => 32,
-        B64 | I64 | F64 => 64,
+        B32 | I32 | F32 | R32 => 32,
+        B64 | I64 | F64 | R64 => 64,
        B128 | I128 => 128,
        IFLAGS | FFLAGS => 32,
        B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64,
@ -842,7 +833,7 @@ pub fn ty_bits(ty: Type) -> usize {

 pub(crate) fn ty_is_int(ty: Type) -> bool {
    match ty {
-        B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
+        B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true,
        F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
        IFLAGS | FFLAGS => panic!("Unexpected flags type"),
        _ => panic!("ty_is_int() on unknown type: {:?}", ty),
@ -988,16 +979,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
        (false, true) => NarrowValueMode::SignExtend64,
        (false, false) => NarrowValueMode::ZeroExtend64,
    };
-    let inputs = [
-        InsnInput {
-            insn: insn,
-            input: 0,
-        },
-        InsnInput {
-            insn: insn,
-            input: 1,
-        },
-    ];
+    let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
    let ty = ctx.input_ty(insn, 0);
    let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
    let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
@ -1010,16 +992,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    let ty = ctx.input_ty(insn, 0);
    let bits = ty_bits(ty);
-    let inputs = [
-        InsnInput {
-            insn: insn,
-            input: 0,
-        },
-        InsnInput {
-            insn: insn,
-            input: 1,
-        },
-    ];
+    let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
    match bits {
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
@ -58,96 +58,117 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Iadd => {
            let rd = get_output_reg(ctx, outputs[0]);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
-            let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
-            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            if ty_bits(ty) < 128 {
+                let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+                let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
+                ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            } else {
+                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                ctx.emit(Inst::VecRRR {
+                    rd,
+                    rn,
+                    rm,
+                    alu_op: VecALUOp::Add,
+                    size: VectorSize::from_ty(ty),
+                });
+            }
        }
        Opcode::Isub => {
            let rd = get_output_reg(ctx, outputs[0]);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
-            let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
-            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            if ty_bits(ty) < 128 {
+                let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+                let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
+                ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            } else {
+                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                ctx.emit(Inst::VecRRR {
+                    rd,
+                    rn,
+                    rm,
+                    alu_op: VecALUOp::Sub,
+                    size: VectorSize::from_ty(ty),
+                });
+            }
        }
-        Opcode::UaddSat | Opcode::SaddSat => {
-            // We use the vector instruction set's saturating adds (UQADD /
-            // SQADD), which require vector registers.
-            let is_signed = op == Opcode::SaddSat;
-            let narrow_mode = if is_signed {
-                NarrowValueMode::SignExtend64
-            } else {
-                NarrowValueMode::ZeroExtend64
-            };
-            let alu_op = if is_signed {
-                VecALUOp::SQAddScalar
-            } else {
-                VecALUOp::UQAddScalar
-            };
-            let va = ctx.alloc_tmp(RegClass::V128, I128);
-            let vb = ctx.alloc_tmp(RegClass::V128, I128);
-            let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
+        Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
+            // We use the scalar SIMD & FP saturating additions and subtractions
+            // (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
+            let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
+            let ty = ty.unwrap();
            let rd = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
-            ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
-            ctx.emit(Inst::VecRRR {
-                rd: va,
-                rn: va.to_reg(),
-                rm: vb.to_reg(),
-                alu_op,
-                ty: I64,
-            });
-            ctx.emit(Inst::MovFromVec {
-                rd,
-                rn: va.to_reg(),
-                idx: 0,
-                ty: I64,
-            });
-        }
+            if ty_bits(ty) < 128 {
+                let narrow_mode = if is_signed {
+                    NarrowValueMode::SignExtend64
+                } else {
+                    NarrowValueMode::ZeroExtend64
+                };
+                let fpu_op = match op {
+                    Opcode::UaddSat => FPUOp2::Uqadd64,
+                    Opcode::SaddSat => FPUOp2::Sqadd64,
+                    Opcode::UsubSat => FPUOp2::Uqsub64,
+                    Opcode::SsubSat => FPUOp2::Sqsub64,
+                    _ => unreachable!(),
+                };
+                let va = ctx.alloc_tmp(RegClass::V128, I128);
+                let vb = ctx.alloc_tmp(RegClass::V128, I128);
+                let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
+                let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
+                ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
+                ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
+                ctx.emit(Inst::FpuRRR {
+                    fpu_op,
+                    rd: va,
+                    rn: va.to_reg(),
+                    rm: vb.to_reg(),
+                });
+                ctx.emit(Inst::MovFromVec {
+                    rd,
+                    rn: va.to_reg(),
+                    idx: 0,
+                    size: VectorSize::Size64x2,
+                });
+            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);

-        Opcode::UsubSat | Opcode::SsubSat => {
-            let is_signed = op == Opcode::SsubSat;
-            let narrow_mode = if is_signed {
-                NarrowValueMode::SignExtend64
-            } else {
-                NarrowValueMode::ZeroExtend64
-            };
-            let alu_op = if is_signed {
-                VecALUOp::SQSubScalar
-            } else {
-                VecALUOp::UQSubScalar
-            };
-            let va = ctx.alloc_tmp(RegClass::V128, I128);
-            let vb = ctx.alloc_tmp(RegClass::V128, I128);
-            let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
-            let rd = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
-            ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
-            ctx.emit(Inst::VecRRR {
-                rd: va,
-                rn: va.to_reg(),
-                rm: vb.to_reg(),
-                alu_op,
-                ty: I64,
-            });
-            ctx.emit(Inst::MovFromVec {
-                rd,
-                rn: va.to_reg(),
-                idx: 0,
-                ty: I64,
-            });
+                let alu_op = match op {
+                    Opcode::UaddSat => VecALUOp::Uqadd,
+                    Opcode::SaddSat => VecALUOp::Sqadd,
+                    Opcode::UsubSat => VecALUOp::Uqsub,
+                    Opcode::SsubSat => VecALUOp::Sqsub,
+                    _ => unreachable!(),
+                };
+
+                ctx.emit(Inst::VecRRR {
+                    rd,
+                    rn,
+                    rm,
+                    alu_op,
+                    size: VectorSize::from_ty(ty),
+                });
+            }
        }

        Opcode::Ineg => {
            let rd = get_output_reg(ctx, outputs[0]);
-            let rn = zero_reg();
-            let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
            let ty = ty.unwrap();
-            let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
-            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            if ty_bits(ty) < 128 {
+                let rn = zero_reg();
+                let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
+                let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
+                ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                ctx.emit(Inst::VecMisc {
+                    op: VecMisc2::Neg,
+                    rd,
+                    rn,
+                    size: VectorSize::from_ty(ty),
+                });
+            }
        }

        Opcode::Imul => {
@ -155,14 +176,24 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
-            let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
-            ctx.emit(Inst::AluRRRR {
-                alu_op,
-                rd,
-                rn,
-                rm,
-                ra: zero_reg(),
-            });
+            if ty_bits(ty) < 128 {
+                let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
+                ctx.emit(Inst::AluRRRR {
+                    alu_op,
+                    rd,
+                    rn,
+                    rm,
+                    ra: zero_reg(),
+                });
+            } else {
+                ctx.emit(Inst::VecRRR {
+                    alu_op: VecALUOp::Mul,
+                    rd,
+                    rn,
+                    rm,
+                    size: VectorSize::from_ty(ty),
+                });
+            }
        }

        Opcode::Umulhi | Opcode::Smulhi => {
@ -282,14 +313,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm

                // Check for divide by 0.
-                let branch_size = 8;
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(branch_size),
-                    kind: CondBrKind::NotZero(rm),
-                });
-
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Zero(rm),
+                });

                ctx.emit(Inst::AluRRRR {
                    alu_op: ALUOp::MSub64,
@ -300,17 +328,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                });
            } else {
                if div_op == ALUOp::SDiv64 {
-                    //   cbz rm, #20
+                    //   cbnz rm, #8
+                    //   udf ; divide by zero
                    //   cmn rm, 1
                    //   ccmp rn, 1, #nzcv, eq
-                    //   b.vc 12
+                    //   b.vc #8
                    //   udf ; signed overflow
-                    //   udf ; divide by zero

                    // Check for divide by 0.
-                    let branch_size = 20;
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(branch_size),
+                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
                        kind: CondBrKind::Zero(rm),
                    });

@ -319,7 +347,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    // The following checks must be done in 32-bit or 64-bit, depending
                    // on the input type. Even though the initial div instruction is
                    // always done in 64-bit currently.
-                    let size = InstSize::from_ty(ty);
+                    let size = OperandSize::from_ty(ty);
                    // Check RHS is -1.
                    ctx.emit(Inst::AluRRImm12 {
                        alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
@ -336,27 +364,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        nzcv: NZCV::new(false, false, false, false),
                        cond: Cond::Eq,
                    });
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(12),
-                        kind: CondBrKind::Cond(Cond::Vc),
-                    });
-
                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                    ctx.emit(Inst::Udf { trap_info });
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
+                        kind: CondBrKind::Cond(Cond::Vs),
+                    });
                } else {
                    //   cbnz rm, #8
                    //   udf ; divide by zero

                    // Check for divide by 0.
-                    let branch_size = 8;
-                    ctx.emit(Inst::OneWayCondBr {
-                        target: BranchTarget::ResolvedOffset(branch_size),
-                        kind: CondBrKind::NotZero(rm),
+                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
+                    ctx.emit(Inst::TrapIf {
+                        trap_info,
+                        kind: CondBrKind::Zero(rm),
                    });
                }
-
-                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
-                ctx.emit(Inst::Udf { trap_info });
            }
        }

@ -398,7 +421,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    op: VecMisc2::Not,
                    rd,
                    rn: rm,
-                    ty,
+                    size: VectorSize::from_ty(ty),
                });
            }
        }
@ -442,32 +465,64 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rd,
                    rn,
                    rm,
-                    ty,
+                    size: VectorSize::from_ty(ty),
                });
            }
        }

        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
            let ty = ty.unwrap();
-            let size = InstSize::from_bits(ty_bits(ty));
-            let narrow_mode = match (op, size) {
-                (Opcode::Ishl, _) => NarrowValueMode::None,
-                (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
-                (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
-                (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
-                (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
-                _ => unreachable!(),
-            };
            let rd = get_output_reg(ctx, outputs[0]);
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
-            let alu_op = match op {
-                Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
-                Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
-                Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
-                _ => unreachable!(),
-            };
-            ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
+            if ty_bits(ty) < 128 {
+                let size = OperandSize::from_bits(ty_bits(ty));
+                let narrow_mode = match (op, size) {
+                    (Opcode::Ishl, _) => NarrowValueMode::None,
+                    (Opcode::Ushr, OperandSize::Size64) => NarrowValueMode::ZeroExtend64,
+                    (Opcode::Ushr, OperandSize::Size32) => NarrowValueMode::ZeroExtend32,
+                    (Opcode::Sshr, OperandSize::Size64) => NarrowValueMode::SignExtend64,
+                    (Opcode::Sshr, OperandSize::Size32) => NarrowValueMode::SignExtend32,
+                    _ => unreachable!(),
+                };
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+                let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
+                let alu_op = match op {
+                    Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
+                    Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
+                    Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
+                    _ => unreachable!(),
+                };
+                ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
+            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let size = VectorSize::from_ty(ty);
+                let (alu_op, is_right_shift) = match op {
+                    Opcode::Ishl => (VecALUOp::Sshl, false),
+                    Opcode::Ushr => (VecALUOp::Ushl, true),
+                    Opcode::Sshr => (VecALUOp::Sshl, true),
+                    _ => unreachable!(),
+                };
+
+                let rm = if is_right_shift {
+                    // Right shifts are implemented with a negative left shift.
+                    let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+                    let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+                    let rn = zero_reg();
+                    ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
+                    tmp.to_reg()
+                } else {
+                    put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
+                };
+
+                ctx.emit(Inst::VecDup { rd, rn: rm, size });
+
+                ctx.emit(Inst::VecRRR {
+                    alu_op,
+                    rd,
+                    rn,
+                    rm: rd.to_reg(),
+                    size,
+                });
+            }
        }

        Opcode::Rotr | Opcode::Rotl => {
@ -1107,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rd,
                    rn,
                    rm,
-                    ty,
+                    size: VectorSize::from_ty(ty),
                });
            }
        }
@ -1134,7 +1189,26 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::IsNull | Opcode::IsInvalid => {
-            panic!("Reference types not supported");
+            // Null references are represented by the constant value 0; invalid references are
+            // represented by the constant value -1. See `define_reftypes()` in
+            // `meta/src/isa/x86/encodings.rs` to confirm.
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let ty = ctx.input_ty(insn, 0);
+            let (alu_op, const_value) = match op {
+                Opcode::IsNull => {
+                    // cmp rn, #0
+                    (choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64), 0)
+                }
+                Opcode::IsInvalid => {
+                    // cmn rn, #1
+                    (choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64), 1)
+                }
+                _ => unreachable!(),
+            };
+            let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap());
+            ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value));
+            ctx.emit(Inst::CSet { rd, cond: Cond::Eq });
        }

        Opcode::Copy => {
@ -1145,6 +1219,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bint | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => {
+            // If this is a Bint from a Trueif/Trueff/IsNull/IsInvalid, then the result is already
+            // 64-bit-zero-extended, even if the CLIF type doesn't say so, because it was produced
+            // by a CSet. In this case, we do not need to do any zero-extension.
+            let input_info = ctx.get_input(insn, 0);
+            let src_op = input_info
+                .inst
+                .map(|(src_inst, _)| ctx.data(src_inst).opcode());
+            let narrow_mode = match (src_op, op) {
+                (Some(Opcode::Trueif), Opcode::Bint)
+                | (Some(Opcode::Trueff), Opcode::Bint)
+                | (Some(Opcode::IsNull), Opcode::Bint)
+                | (Some(Opcode::IsInvalid), Opcode::Bint) => NarrowValueMode::None,
+                _ => NarrowValueMode::ZeroExtend64,
+            };
+
            // All of these ops are simply a move from a zero-extended source.
            // Here is why this works, in each case:
            //
@ -1157,7 +1246,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // - Ireduce: changing width of an integer. Smaller ints are stored
            //   with undefined high-order bits, so we can simply do a copy.

-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ctx.input_ty(insn, 0);
            ctx.emit(Inst::gen_move(rd, rn, ty));
@ -1203,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        rd,
                        rn,
                        idx: 0,
-                        ty: I64,
+                        size: VectorSize::Size64x2,
                    });
                }
            }
@ -1290,7 +1379,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Trap | Opcode::ResumableTrap => {
            let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
-            ctx.emit(Inst::Udf { trap_info })
+            ctx.emit_safepoint(Inst::Udf { trap_info });
        }

        Opcode::Trapif | Opcode::Trapff => {
@ -1324,19 +1413,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                cond
            };

-            // Branch around the break instruction with inverted cond. Go straight to lowered
-            // one-target form; this is logically part of a single-in single-out template lowering.
-            let cond = cond.invert();
-            ctx.emit(Inst::OneWayCondBr {
-                target: BranchTarget::ResolvedOffset(8),
+            ctx.emit(Inst::TrapIf {
+                trap_info,
                kind: CondBrKind::Cond(cond),
            });
-
-            ctx.emit(Inst::Udf { trap_info })
+            ctx.emit_safepoint(Inst::Udf { trap_info })
        }

        Opcode::Safepoint => {
-            panic!("safepoint support not implemented!");
+            panic!("safepoint instructions not used by new backend's safepoints!");
        }

        Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => {
@ -1467,15 +1552,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                let idx = *imm;
                let rd = get_output_reg(ctx, outputs[0]);
                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
                let ty = ty.unwrap();

                if ty_is_int(ty) {
-                    ctx.emit(Inst::MovFromVec { rd, rn, idx, ty });
+                    ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
                // Plain moves are faster on some processors.
                } else if idx == 0 {
                    ctx.emit(Inst::gen_move(rd, rn, ty));
                } else {
-                    ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, ty });
+                    ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
                }
            } else {
                unreachable!();
@ -1485,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Splat => {
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rd = get_output_reg(ctx, outputs[0]);
-            let ty = ctx.input_ty(insn, 0);
-            let inst = if ty_is_int(ty) {
-                Inst::VecDup { rd, rn, ty }
+            let input_ty = ctx.input_ty(insn, 0);
+            let size = VectorSize::from_ty(ty.unwrap());
+            let inst = if ty_is_int(input_ty) {
+                Inst::VecDup { rd, rn, size }
            } else {
-                Inst::VecDupFromFpu { rd, rn, ty }
+                Inst::VecDupFromFpu { rd, rn, size }
            };
            ctx.emit(inst);
        }
@ -1507,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // cmp xm, #0
            // cset xm, ne

-            let input_ty = ctx.input_ty(insn, 0);
+            let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
+
            if op == Opcode::VanyTrue {
                ctx.emit(Inst::VecRRR {
                    alu_op: VecALUOp::Umaxp,
                    rd: tmp,
                    rn: rm,
                    rm: rm,
-                    ty: input_ty,
+                    size,
                });
            } else {
                ctx.emit(Inst::VecLanes {
                    op: VecLanesOp::Uminv,
                    rd: tmp,
                    rn: rm,
-                    ty: input_ty,
+                    size,
                });
            };

@ -1529,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                rd,
                rn: tmp.to_reg(),
                idx: 0,
-                ty: I64,
+                size: VectorSize::Size64x2,
            });

            ctx.emit(Inst::AluRRImm12 {
@ -1711,12 +1799,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
            }
-            ctx.emit(Inst::OneWayCondBr {
-                target: BranchTarget::ResolvedOffset(8),
-                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
-            });
            let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
-            ctx.emit(Inst::Udf { trap_info });
+            ctx.emit(Inst::TrapIf {
+                trap_info,
+                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
+            });

            let tmp = ctx.alloc_tmp(RegClass::V128, I128);

@ -1752,12 +1839,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+                });

                // <= high_bound
                lower_constant_f32(ctx, tmp, high_bound);
@ -1765,12 +1851,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+                });
            } else {
                // From float64.
                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
@ -1795,12 +1880,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+                });

                // <= high_bound
                lower_constant_f64(ctx, tmp, high_bound);
@ -1808,12 +1892,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn,
                    rm: tmp.to_reg(),
                });
-                ctx.emit(Inst::OneWayCondBr {
-                    target: BranchTarget::ResolvedOffset(8),
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
-                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
-                ctx.emit(Inst::Udf { trap_info });
+                ctx.emit(Inst::TrapIf {
+                    trap_info,
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+                });
            };

            // Do the conversion.
@ -2050,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::X86Insertps
        | Opcode::X86Movsd
        | Opcode::X86Movlhps
+        | Opcode::X86Palignr
        | Opcode::X86Psll
        | Opcode::X86Psrl
        | Opcode::X86Psra
@ -2060,7 +2144,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::X86Pminu
        | Opcode::X86Pmullq
        | Opcode::X86Pmuludq
-        | Opcode::X86Packss
        | Opcode::X86Punpckh
        | Opcode::X86Punpckl
        | Opcode::X86Vcvtudq2ps
@ -2069,8 +2152,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            panic!("x86-specific opcode in supposedly arch-neutral IR!");
        }

-        Opcode::Iabs => unimplemented!(),
        Opcode::AvgRound => unimplemented!(),
+        Opcode::Iabs => unimplemented!(),
+        Opcode::Snarrow
+        | Opcode::Unarrow
+        | Opcode::SwidenLow
+        | Opcode::SwidenHigh
+        | Opcode::UwidenLow
+        | Opcode::UwidenHigh => unimplemented!(),
        Opcode::TlsValue => unimplemented!(),
    }

@ -2307,7 +2396,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
                let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);

-                // Bounds-check and branch to default.
+                // Bounds-check, leaving condition codes for JTSequence's
+                // branch to default target below.
                if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
                    ctx.emit(Inst::AluRRImm12 {
                        alu_op: ALUOp::SubS32,
@ -2324,14 +2414,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                        rm: rtmp1.to_reg(),
                    });
                }
-                let default_target = BranchTarget::Label(targets[0]);
-                ctx.emit(Inst::OneWayCondBr {
-                    target: default_target.clone(),
-                    kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
-                });

                // Emit the compound instruction that does:
                //
+                // b.hs default
                // adr rA, jt
                // ldrsw rB, [rA, rIndex, UXTW 2]
                // add rA, rA, rB
@ -2350,6 +2436,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    .skip(1)
                    .map(|bix| BranchTarget::Label(*bix))
                    .collect();
+                let default_target = BranchTarget::Label(targets[0]);
                let targets_for_term: Vec<MachLabel> = targets.to_vec();
                ctx.emit(Inst::JTSequence {
                    ridx,
@ -2357,7 +2444,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    rtmp2,
                    info: Box::new(JTSequenceInfo {
                        targets: jt_targets,
-                        targets_for_term: targets_for_term,
+                        default_target,
+                        targets_for_term,
                    }),
                });
            }
--- a/third_party/rust/cranelift-codegen/src/isa/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/mod.rs
@ -121,7 +121,11 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
    match triple.architecture {
        Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple),
        Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
-            isa_builder!(x86, "x86", triple)
+            if cfg!(feature = "x64") {
+                isa_builder!(x64, "x64", triple)
+            } else {
+                isa_builder!(x86, "x86", triple)
+            }
        }
        Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
        Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
--- a/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
@ -1,4 +1,4 @@
-//! System V ABI unwind information.
+//! Windows x64 ABI unwind information.

 use alloc::vec::Vec;
 use byteorder::{ByteOrder, LittleEndian};
@ -57,10 +57,6 @@ pub(crate) enum UnwindCode {
        offset: u8,
        size: u32,
    },
-    SetFramePointer {
-        offset: u8,
-        sp_offset: u8,
-    },
 }

 impl UnwindCode {
@ -69,7 +65,6 @@ impl UnwindCode {
            PushNonvolatileRegister = 0,
            LargeStackAlloc = 1,
            SmallStackAlloc = 2,
-            SetFramePointer = 3,
            SaveXmm128 = 8,
            SaveXmm128Far = 9,
        }
@ -85,13 +80,13 @@ impl UnwindCode {
                stack_offset,
            } => {
                writer.write_u8(*offset);
-                let stack_offset = stack_offset / 16;
-                if stack_offset <= core::u16::MAX as u32 {
+                let scaled_stack_offset = stack_offset / 16;
+                if scaled_stack_offset <= core::u16::MAX as u32 {
                    writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
-                    writer.write_u16::<LittleEndian>(stack_offset as u16);
+                    writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
                } else {
                    writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
-                    writer.write_u16::<LittleEndian>(stack_offset as u16);
+                    writer.write_u16::<LittleEndian>(*stack_offset as u16);
                    writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
                }
            }
@ -113,10 +108,6 @@ impl UnwindCode {
                    writer.write_u32::<LittleEndian>(*size);
                }
            }
-            Self::SetFramePointer { offset, sp_offset } => {
-                writer.write_u8(*offset);
-                writer.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8));
-            }
        };
    }

--- a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
@ -5,8 +5,9 @@ use log::trace;
 use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
 use std::mem;

+use crate::binemit::Stackmap;
 use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
-use crate::isa::{self, x64::inst::*};
+use crate::isa::{x64::inst::*, CallConv};
 use crate::machinst::*;
 use crate::settings;
 use crate::{CodegenError, CodegenResult};
@ -39,7 +40,7 @@ struct ABISig {
    /// Index in `args` of the stack-return-value-area argument.
    stack_ret_arg: Option<usize>,
    /// Calling convention used.
-    call_conv: isa::CallConv,
+    call_conv: CallConv,
 }

 pub(crate) struct X64ABIBody {
@ -64,7 +65,7 @@ pub(crate) struct X64ABIBody {
    /// which RSP is adjusted downwards to allocate the spill area.
    frame_size_bytes: Option<usize>,

-    call_conv: isa::CallConv,
+    call_conv: CallConv,

    /// The settings controlling this function's compilation.
    flags: settings::Flags,
@ -92,7 +93,11 @@ fn in_vec_reg(ty: types::Type) -> bool {
    }
 }

-fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
+fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+    match call_conv {
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
+        _ => panic!("int args only supported for SysV calling convention"),
+    };
    match idx {
        0 => Some(regs::rdi()),
        1 => Some(regs::rsi()),
@ -104,7 +109,11 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
    }
 }

-fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
+fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+    match call_conv {
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
+        _ => panic!("float args only supported for SysV calling convention"),
+    };
    match idx {
        0 => Some(regs::xmm0()),
        1 => Some(regs::xmm1()),
@ -118,19 +127,39 @@ fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
    }
 }

-fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
-    match idx {
-        0 => Some(regs::rax()),
-        1 => Some(regs::rdx()),
-        _ => None,
+fn get_intreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+    match call_conv {
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
+            0 => Some(regs::rax()),
+            1 => Some(regs::rdx()),
+            _ => None,
+        },
+        CallConv::BaldrdashSystemV => {
+            if idx == 0 {
+                Some(regs::rax())
+            } else {
+                None
+            }
+        }
+        CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
    }
 }

-fn get_fltreg_for_retval_systemv(idx: usize) -> Option<Reg> {
-    match idx {
-        0 => Some(regs::xmm0()),
-        1 => Some(regs::xmm1()),
-        _ => None,
+fn get_fltreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+    match call_conv {
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
+            0 => Some(regs::xmm0()),
+            1 => Some(regs::xmm1()),
+            _ => None,
+        },
+        CallConv::BaldrdashSystemV => {
+            if idx == 0 {
+                Some(regs::xmm0())
+            } else {
+                None
+            }
+        }
+        CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
    }
 }

@ -146,10 +175,39 @@ fn is_callee_save_systemv(r: RealReg) -> bool {
    }
 }

-fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
-    regs.into_iter()
-        .filter(|r| is_callee_save_systemv(r.to_reg()))
-        .collect()
+fn is_callee_save_baldrdash(r: RealReg) -> bool {
+    use regs::*;
+    match r.get_class() {
+        RegClass::I64 => {
+            if r.get_hw_encoding() as u8 == ENC_R14 {
+                // r14 is the WasmTlsReg and is preserved implicitly.
+                false
+            } else {
+                // Defer to native for the other ones.
+                is_callee_save_systemv(r)
+            }
+        }
+        RegClass::V128 => false,
+        _ => unimplemented!(),
+    }
+}
+
+fn get_callee_saves(call_conv: &CallConv, regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+    match call_conv {
+        CallConv::BaldrdashSystemV => regs
+            .into_iter()
+            .filter(|r| is_callee_save_baldrdash(r.to_reg()))
+            .collect(),
+        CallConv::BaldrdashWindows => {
+            todo!("baldrdash windows");
+        }
+        CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
+            .into_iter()
+            .filter(|r| is_callee_save_systemv(r.to_reg()))
+            .collect(),
+        CallConv::WindowsFastcall => todo!("windows fastcall"),
+        CallConv::Probestack => todo!("probestack?"),
+    }
 }

 impl X64ABIBody {
@ -159,7 +217,7 @@ impl X64ABIBody {

        let call_conv = f.signature.call_conv;
        debug_assert!(
-            call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(),
+            call_conv == CallConv::SystemV || call_conv.extends_baldrdash(),
            "unsupported or unimplemented calling convention {}",
            call_conv
        );
@ -194,7 +252,6 @@ impl X64ABIBody {
        if self.call_conv.extends_baldrdash() {
            let num_words = self.flags.baldrdash_prologue_words() as i64;
            debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
-            debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
            num_words * 8
        } else {
            16 // frame pointer + return address.
@ -268,7 +325,18 @@ impl ABIBody for X64ABIBody {
    }

    fn gen_retval_area_setup(&self) -> Option<Inst> {
-        None
+        if let Some(i) = self.sig.stack_ret_arg {
+            let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap());
+            trace!(
+                "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
+                inst,
+                self.ret_area_ptr.unwrap().to_reg()
+            );
+            Some(inst)
+        } else {
+            trace!("gen_retval_area_setup: not needed");
+            None
+        }
    }

    fn gen_copy_reg_to_retval(
@ -294,15 +362,17 @@ impl ABIBody for X64ABIBody {
                    (ArgumentExtension::Uext, Some(ext_mode)) => {
                        ret.push(Inst::movzx_rm_r(
                            ext_mode,
-                            RegMem::reg(r.to_reg()),
+                            RegMem::reg(from_reg.to_reg()),
                            dest_reg,
+                            /* infallible load */ None,
                        ));
                    }
                    (ArgumentExtension::Sext, Some(ext_mode)) => {
                        ret.push(Inst::movsx_rm_r(
                            ext_mode,
-                            RegMem::reg(r.to_reg()),
+                            RegMem::reg(from_reg.to_reg()),
                            dest_reg,
+                            /* infallible load */ None,
                        ));
                    }
                    _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)),
@ -326,6 +396,7 @@ impl ABIBody for X64ABIBody {
                            ext_mode,
                            RegMem::reg(from_reg.to_reg()),
                            from_reg,
+                            /* infallible load */ None,
                        ));
                    }
                    (ArgumentExtension::Sext, Some(ext_mode)) => {
@ -333,6 +404,7 @@ impl ABIBody for X64ABIBody {
                            ext_mode,
                            RegMem::reg(from_reg.to_reg()),
                            from_reg,
+                            /* infallible load */ None,
                        ));
                    }
                    _ => {}
@ -387,12 +459,36 @@ impl ABIBody for X64ABIBody {
        unimplemented!("store_stackslot")
    }

-    fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
-        unimplemented!("load_spillslot")
+    fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
+        // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
+        let islot = slot.get() as i64;
+        let spill_off = islot * 8;
+        let sp_off = self.stack_slots_size as i64 + spill_off;
+        debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
+        trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
+        load_stack(
+            SyntheticAmode::nominal_sp_offset(sp_off as u32),
+            into_reg,
+            ty,
+        )
    }

-    fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
-        unimplemented!("store_spillslot")
+    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
+        // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
+        let islot = slot.get() as i64;
+        let spill_off = islot * 8;
+        let sp_off = self.stack_slots_size as i64 + spill_off;
+        debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
+        trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
+        store_stack(
+            SyntheticAmode::nominal_sp_offset(sp_off as u32),
+            from_reg,
+            ty,
+        )
+    }
+
+    fn spillslots_to_stackmap(&self, _slots: &[SpillSlot], _state: &EmitState) -> Stackmap {
+        unimplemented!("spillslots_to_stackmap")
    }

    fn gen_prologue(&mut self) -> Vec<Inst> {
@ -412,7 +508,7 @@ impl ABIBody for X64ABIBody {
            insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
        }

-        let clobbered = get_callee_saves(self.clobbered.to_vec());
+        let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
        let callee_saved_used: usize = clobbered
            .iter()
            .map(|reg| match reg.to_reg().get_class() {
@ -456,7 +552,7 @@ impl ABIBody for X64ABIBody {

        // Save callee saved registers that we trash. Keep track of how much space we've used, so
        // as to know what we have to do to get the base of the spill area 0 % 16.
-        let clobbered = get_callee_saves(self.clobbered.to_vec());
+        let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
        for reg in clobbered {
            let r_reg = reg.to_reg();
            match r_reg.get_class() {
@ -486,7 +582,7 @@ impl ABIBody for X64ABIBody {
        // Undo what we did in the prologue.

        // Restore regs.
-        let clobbered = get_callee_saves(self.clobbered.to_vec());
+        let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
        for wreg in clobbered.into_iter().rev() {
            let rreg = wreg.to_reg();
            match rreg.get_class() {
@ -533,6 +629,10 @@ impl ABIBody for X64ABIBody {
            .expect("frame size not computed before prologue generation") as u32
    }

+    fn stack_args_size(&self) -> u32 {
+        unimplemented!("I need to be computed!")
+    }
+
    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
        // We allocate in terms of 8-byte slots.
        match (rc, ty) {
@ -543,16 +643,43 @@ impl ABIBody for X64ABIBody {
        }
    }

-    fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
-        unimplemented!()
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Inst {
+        let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty);
+        self.store_spillslot(to_slot, ty, from_reg.to_reg())
    }

-    fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
-        unimplemented!()
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        ty: Option<Type>,
+    ) -> Inst {
+        let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty);
+        self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
    }
 }

-fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
+/// Return a type either from an optional type hint, or if not, from the default
+/// type associated with the given register's class. This is used to generate
+/// loads/spills appropriately given the type of value loaded/stored (which may
+/// be narrower than the spillslot). We usually have the type because the
+/// regalloc usually provides the vreg being spilled/reloaded, and we know every
+/// vreg's type. However, the regalloc *can* request a spill/reload without an
+/// associated vreg when needed to satisfy a safepoint (which requires all
+/// ref-typed values, even those in real registers in the original vcode, to be
+/// in spillslots).
+fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
+    match (ty, r.get_class()) {
+        // If the type is provided
+        (Some(t), _) => t,
+        // If no type is provided, this should be a register spill for a
+        // safepoint, so we only expect I64 (integer) registers.
+        (None, RegClass::I64) => I64,
+        _ => panic!("Unexpected register class!"),
+    }
+}
+
+fn get_caller_saves(call_conv: CallConv) -> Vec<Writable<Reg>> {
    let mut caller_saved = Vec::new();

    // Systemv calling convention:
@ -567,6 +694,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
    caller_saved.push(Writable::from_reg(regs::r10()));
    caller_saved.push(Writable::from_reg(regs::r11()));

+    if call_conv.extends_baldrdash() {
+        caller_saved.push(Writable::from_reg(regs::r12()));
+        caller_saved.push(Writable::from_reg(regs::r13()));
+        // Not r14; implicitly preserved in the entry.
+        caller_saved.push(Writable::from_reg(regs::r15()));
+        caller_saved.push(Writable::from_reg(regs::rbx()));
+    }
+
    // - XMM: all the registers!
    caller_saved.push(Writable::from_reg(regs::xmm0()));
    caller_saved.push(Writable::from_reg(regs::xmm1()));
@ -585,10 +720,6 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
    caller_saved.push(Writable::from_reg(regs::xmm14()));
    caller_saved.push(Writable::from_reg(regs::xmm15()));

-    if call_conv.extends_baldrdash() {
-        todo!("add the baldrdash caller saved")
-    }
-
    caller_saved
 }

@ -615,7 +746,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
 }

 /// Try to fill a Baldrdash register, returning it if it was found.
-fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
+fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
    if call_conv.extends_baldrdash() {
        match &param.purpose {
            &ir::ArgumentPurpose::VMContext => {
@ -649,16 +780,13 @@ enum ArgsOrRets {
 /// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the
 /// index of the extra synthetic arg that was added.
 fn compute_arg_locs(
-    call_conv: isa::CallConv,
+    call_conv: CallConv,
    params: &[ir::AbiParam],
    args_or_rets: ArgsOrRets,
    add_ret_area_ptr: bool,
 ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
    let is_baldrdash = call_conv.extends_baldrdash();

-    // XXX assume SystemV at the moment.
-    debug_assert!(!is_baldrdash, "baldrdash nyi");
-
    let mut next_gpr = 0;
    let mut next_vreg = 0;
    let mut next_stack: u64 = 0;
@ -692,8 +820,8 @@ fn compute_arg_locs(

        let (next_reg, candidate) = if intreg {
            let candidate = match args_or_rets {
-                ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr),
-                ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr),
+                ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
+                ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr),
            };
            debug_assert!(candidate
                .map(|r| r.get_class() == RegClass::I64)
@ -701,8 +829,8 @@ fn compute_arg_locs(
            (&mut next_gpr, candidate)
        } else {
            let candidate = match args_or_rets {
-                ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_vreg),
-                ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_vreg),
+                ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
+                ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg),
            };
            debug_assert!(candidate
                .map(|r| r.get_class() == RegClass::V128)
@ -735,7 +863,7 @@ fn compute_arg_locs(

    let extra_arg = if add_ret_area_ptr {
        debug_assert!(args_or_rets == ArgsOrRets::Args);
-        if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) {
+        if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
            ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64));
        } else {
            ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64));
@ -828,7 +956,7 @@ fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
    }
 }

-fn load_stack(mem: Amode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+fn load_stack(mem: impl Into<SyntheticAmode>, into_reg: Writable<Reg>, ty: Type) -> Inst {
    let ext_mode = match ty {
        types::B1 | types::B8 | types::I8 => Some(ExtMode::BQ),
        types::B16 | types::I16 => Some(ExtMode::WQ),
@ -839,13 +967,19 @@ fn load_stack(mem: Amode, into_reg: Writable<Reg>, ty: Type) -> Inst {
        _ => unimplemented!("load_stack({})", ty),
    };

+    let mem = mem.into();
    match ext_mode {
-        Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg),
-        None => Inst::mov64_m_r(mem, into_reg),
+        Some(ext_mode) => Inst::movsx_rm_r(
+            ext_mode,
+            RegMem::mem(mem),
+            into_reg,
+            /* infallible load */ None,
+        ),
+        None => Inst::mov64_m_r(mem, into_reg, None /* infallible */),
    }
 }

-fn store_stack(mem: Amode, from_reg: Reg, ty: Type) -> Inst {
+fn store_stack(mem: impl Into<SyntheticAmode>, from_reg: Reg, ty: Type) -> Inst {
    let (is_int, size) = match ty {
        types::B1 | types::B8 | types::I8 => (true, 1),
        types::B16 | types::I16 => (true, 2),
@ -855,8 +989,9 @@ fn store_stack(mem: Amode, from_reg: Reg, ty: Type) -> Inst {
        types::F64 => (false, 8),
        _ => unimplemented!("store_stack({})", ty),
    };
+    let mem = mem.into();
    if is_int {
-        Inst::mov_r_m(size, from_reg, mem)
+        Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None)
    } else {
        unimplemented!("f32/f64 store_stack");
    }
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@ -27,6 +27,10 @@ pub enum Amode {
        index: Reg,
        shift: u8, /* 0 .. 3 only */
    },
+
+    /// sign-extend-32-to-64(Immediate) + RIP (instruction pointer).
+    /// To wit: not supported in 32-bits mode.
+    RipRelative { target: BranchTarget },
 }

 impl Amode {
@ -47,6 +51,10 @@ impl Amode {
        }
    }

+    pub(crate) fn rip_relative(target: BranchTarget) -> Self {
+        Self::RipRelative { target }
+    }
+
    /// Add the regs mentioned by `self` to `collector`.
    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
        match self {
@ -57,6 +65,9 @@ impl Amode {
                collector.add_use(*base);
                collector.add_use(*index);
            }
+            Amode::RipRelative { .. } => {
+                // RIP isn't involved in regalloc.
+            }
        }
    }
 }
@ -79,6 +90,13 @@ impl ShowWithRRU for Amode {
                index.show_rru(mb_rru),
                1 << shift
            ),
+            Amode::RipRelative { ref target } => format!(
+                "{}(%rip)",
+                match target {
+                    BranchTarget::Label(label) => format!("label{}", label.get()),
+                    BranchTarget::ResolvedOffset(offset) => offset.to_string(),
+                }
+            ),
        }
    }
 }
@ -181,7 +199,7 @@ impl RegMemImm {
        match self {
            Self::Reg { reg } => collector.add_use(*reg),
            Self::Mem { addr } => addr.get_regs_as_uses(collector),
-            Self::Imm { simm32: _ } => {}
+            Self::Imm { .. } => {}
        }
    }
 }
@ -216,12 +234,11 @@ impl RegMem {
    pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
        Self::Mem { addr: addr.into() }
    }
-
    /// Add the regs mentioned by `self` to `collector`.
    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
        match self {
            RegMem::Reg { reg } => collector.add_use(*reg),
-            RegMem::Mem { addr } => addr.get_regs_as_uses(collector),
+            RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
        }
    }
 }
@ -234,7 +251,7 @@ impl ShowWithRRU for RegMem {
    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
        match self {
            RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RegMem::Mem { addr } => addr.show_rru(mb_rru),
+            RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
        }
    }
 }
@ -265,9 +282,32 @@ impl fmt::Debug for AluRmiROpcode {
    }
 }

-impl ToString for AluRmiROpcode {
-    fn to_string(&self) -> String {
-        format!("{:?}", self)
+impl fmt::Display for AluRmiROpcode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
+    }
+}
+
+#[derive(Clone, PartialEq)]
+pub enum UnaryRmROpcode {
+    /// Bit-scan reverse.
+    Bsr,
+    /// Bit-scan forward.
+    Bsf,
+}
+
+impl fmt::Debug for UnaryRmROpcode {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
+            UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
+        }
+    }
+}
+
+impl fmt::Display for UnaryRmROpcode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
    }
 }

@ -428,9 +468,9 @@ impl fmt::Debug for SseOpcode {
    }
 }

-impl ToString for SseOpcode {
-    fn to_string(&self) -> String {
-        format!("{:?}", self)
+impl fmt::Display for SseOpcode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
    }
 }

@ -479,34 +519,65 @@ impl fmt::Debug for ExtMode {
    }
 }

-impl ToString for ExtMode {
-    fn to_string(&self) -> String {
-        format!("{:?}", self)
+impl fmt::Display for ExtMode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
    }
 }

-/// These indicate the form of a scalar shift: left, signed right, unsigned right.
+/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
 #[derive(Clone)]
 pub enum ShiftKind {
-    Left,
-    RightZ,
-    RightS,
+    ShiftLeft,
+    /// Inserts zeros in the most significant bits.
+    ShiftRightLogical,
+    /// Replicates the sign bit in the most significant bits.
+    ShiftRightArithmetic,
+    RotateLeft,
+    RotateRight,
 }

 impl fmt::Debug for ShiftKind {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        let name = match self {
-            ShiftKind::Left => "shl",
-            ShiftKind::RightZ => "shr",
-            ShiftKind::RightS => "sar",
+            ShiftKind::ShiftLeft => "shl",
+            ShiftKind::ShiftRightLogical => "shr",
+            ShiftKind::ShiftRightArithmetic => "sar",
+            ShiftKind::RotateLeft => "rol",
+            ShiftKind::RotateRight => "ror",
        };
        write!(fmt, "{}", name)
    }
 }

-impl ToString for ShiftKind {
-    fn to_string(&self) -> String {
-        format!("{:?}", self)
+impl fmt::Display for ShiftKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
+    }
+}
+
+/// What kind of division or remainer instruction this is?
+#[derive(Clone)]
+pub enum DivOrRemKind {
+    SignedDiv,
+    UnsignedDiv,
+    SignedRem,
+    UnsignedRem,
+}
+
+impl DivOrRemKind {
+    pub(crate) fn is_signed(&self) -> bool {
+        match self {
+            DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
+            _ => false,
+        }
+    }
+
+    pub(crate) fn is_div(&self) -> bool {
+        match self {
+            DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
+            _ => false,
+        }
    }
 }

@ -532,7 +603,7 @@ pub enum CC {

    /// <= unsigned
    BE = 6,
-    /// > unsigend
+    /// > unsigned
    NBE = 7,

    /// negative
@ -621,9 +692,9 @@ impl fmt::Debug for CC {
    }
 }

-impl ToString for CC {
-    fn to_string(&self) -> String {
-        format!("{:?}", self)
+impl fmt::Display for CC {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Debug::fmt(self, f)
    }
 }

--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
@ -1,6 +1,8 @@
 use log::debug;
 use regalloc::Reg;

+use std::convert::TryFrom;
+
 use crate::binemit::Reloc;
 use crate::isa::x64::inst::*;

@ -262,6 +264,34 @@ fn emit_std_enc_mem(
                panic!("ImmRegRegShift");
            }
        }
+
+        Amode::RipRelative { ref target } => {
+            // First, the REX byte, with REX.B = 0.
+            rex.emit_two_op(sink, enc_g, 0);
+
+            // Now the opcode(s).  These include any other prefixes the caller
+            // hands to us.
+            while num_opcodes > 0 {
+                num_opcodes -= 1;
+                sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+            }
+
+            // RIP-relative is mod=00, rm=101.
+            sink.put1(encode_modrm(0, enc_g & 7, 0b101));
+
+            match *target {
+                BranchTarget::Label(label) => {
+                    let offset = sink.cur_offset();
+                    sink.use_label_at_offset(offset, label, LabelUse::JmpRel32);
+                    sink.put4(0);
+                }
+                BranchTarget::ResolvedOffset(offset) => {
+                    let offset =
+                        u32::try_from(offset).expect("rip-relative can't hold >= U32_MAX values");
+                    sink.put4(offset);
+                }
+            }
+        }
    }
 }

@ -340,6 +370,16 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
    }
 }

+/// Emits a one way conditional jump if CC is set (true).
+fn one_way_jmp(sink: &mut MachBuffer<Inst>, cc: CC, label: MachLabel) {
+    let cond_start = sink.cur_offset();
+    let cond_disp_off = cond_start + 2;
+    sink.use_label_at_offset(cond_disp_off, label, LabelUse::JmpRel32);
+    sink.put1(0x0F);
+    sink.put1(0x80 + cc.get_enc());
+    sink.put4(0x0);
+}
+
 /// The top-level emit function.
 ///
 /// Important!  Do not add improved (shortened) encoding cases to existing
@ -395,7 +435,7 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
 pub(crate) fn emit(
    inst: &Inst,
    sink: &mut MachBuffer<Inst>,
-    _flags: &settings::Flags,
+    flags: &settings::Flags,
    state: &mut EmitState,
 ) {
    match inst {
@ -516,6 +556,226 @@ pub(crate) fn emit(
            }
        }

+        Inst::UnaryRmR { size, op, src, dst } => {
+            let (prefix, rex_flags) = match size {
+                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefix::None, RexFlags::clear_w()),
+                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                _ => unreachable!(),
+            };
+
+            let (opcode, num_opcodes) = match op {
+                UnaryRmROpcode::Bsr => (0x0fbd, 2),
+                UnaryRmROpcode::Bsf => (0x0fbc, 2),
+            };
+
+            match src {
+                RegMem::Reg { reg: src } => emit_std_reg_reg(
+                    sink,
+                    prefix,
+                    opcode,
+                    num_opcodes,
+                    dst.to_reg(),
+                    *src,
+                    rex_flags,
+                ),
+                RegMem::Mem { addr: src } => emit_std_reg_mem(
+                    sink,
+                    prefix,
+                    opcode,
+                    num_opcodes,
+                    dst.to_reg(),
+                    &src.finalize(state),
+                    rex_flags,
+                ),
+            }
+        }
+
+        Inst::Div {
+            size,
+            signed,
+            divisor,
+            loc,
+        } => {
+            let (prefix, rex_flags) = match size {
+                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefix::None, RexFlags::clear_w()),
+                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                _ => unreachable!(),
+            };
+
+            sink.add_trap(*loc, TrapCode::IntegerDivisionByZero);
+
+            let subopcode = if *signed { 7 } else { 6 };
+            match divisor {
+                RegMem::Reg { reg } => {
+                    let src = int_reg_enc(*reg);
+                    emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
+                }
+                RegMem::Mem { addr: src } => emit_std_enc_mem(
+                    sink,
+                    prefix,
+                    0xF7,
+                    1,
+                    subopcode,
+                    &src.finalize(state),
+                    rex_flags,
+                ),
+            }
+        }
+
+        Inst::MulHi { size, signed, rhs } => {
+            let (prefix, rex_flags) = match size {
+                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefix::None, RexFlags::clear_w()),
+                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                _ => unreachable!(),
+            };
+
+            let subopcode = if *signed { 5 } else { 4 };
+            match rhs {
+                RegMem::Reg { reg } => {
+                    let src = int_reg_enc(*reg);
+                    emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
+                }
+                RegMem::Mem { addr: src } => emit_std_enc_mem(
+                    sink,
+                    prefix,
+                    0xF7,
+                    1,
+                    subopcode,
+                    &src.finalize(state),
+                    rex_flags,
+                ),
+            }
+        }
+
+        Inst::SignExtendRaxRdx { size } => {
+            match size {
+                2 => sink.put1(0x66),
+                4 => {}
+                8 => sink.put1(0x48),
+                _ => unreachable!(),
+            }
+            sink.put1(0x99);
+        }
+
+        Inst::CheckedDivOrRemSeq {
+            kind,
+            size,
+            divisor,
+            loc,
+            tmp,
+        } => {
+            // Generates the following code sequence:
+            //
+            // ;; check divide by zero:
+            // cmp 0 %divisor
+            // jnz $after_trap
+            // ud2
+            // $after_trap:
+            //
+            // ;; for signed modulo/div:
+            // cmp -1 %divisor
+            // jnz $do_op
+            // ;;   for signed modulo, result is 0
+            //    mov #0, %rdx
+            //    j $done
+            // ;;   for signed div, check for integer overflow against INT_MIN of the right size
+            // cmp INT_MIN, %rax
+            // jnz $do_op
+            // ud2
+            //
+            // $do_op:
+            // ;; if signed
+            //     cdq ;; sign-extend from rax into rdx
+            // ;; else
+            //     mov #0, %rdx
+            // idiv %divisor
+            //
+            // $done:
+            debug_assert!(flags.avoid_div_traps());
+
+            // Check if the divisor is zero, first.
+            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor);
+            inst.emit(sink, flags, state);
+
+            let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc);
+            inst.emit(sink, flags, state);
+
+            let (do_op, done_label) = if kind.is_signed() {
+                // Now check if the divisor is -1.
+                let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor);
+                inst.emit(sink, flags, state);
+
+                let do_op = sink.get_label();
+
+                // If not equal, jump to do-op.
+                one_way_jmp(sink, CC::NZ, do_op);
+
+                // Here, divisor == -1.
+                if !kind.is_div() {
+                    // x % -1 = 0; put the result into the destination, $rdx.
+                    let done_label = sink.get_label();
+
+                    let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx()));
+                    inst.emit(sink, flags, state);
+
+                    let inst = Inst::jmp_known(BranchTarget::Label(done_label));
+                    inst.emit(sink, flags, state);
+
+                    (Some(do_op), Some(done_label))
+                } else {
+                    // Check for integer overflow.
+                    if *size == 8 {
+                        let tmp = tmp.expect("temporary for i64 sdiv");
+
+                        let inst = Inst::imm_r(true, 0x8000000000000000, tmp);
+                        inst.emit(sink, flags, state);
+
+                        let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
+                        inst.emit(sink, flags, state);
+                    } else {
+                        let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
+                        inst.emit(sink, flags, state);
+                    }
+
+                    // If not equal, jump over the trap.
+                    let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow, *loc);
+                    inst.emit(sink, flags, state);
+
+                    (Some(do_op), None)
+                }
+            } else {
+                (None, None)
+            };
+
+            if let Some(do_op) = do_op {
+                sink.bind_label(do_op);
+            }
+
+            // Fill in the high parts:
+            if kind.is_signed() {
+                // sign-extend the sign-bit of rax into rdx, for signed opcodes.
+                let inst = Inst::sign_extend_rax_to_rdx(*size);
+                inst.emit(sink, flags, state);
+            } else {
+                // zero for unsigned opcodes.
+                let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx()));
+                inst.emit(sink, flags, state);
+            }
+
+            let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc);
+            inst.emit(sink, flags, state);
+
+            // Lowering takes care of moving the result back into the right register, see comment
+            // there.
+
+            if let Some(done) = done_label {
+                sink.bind_label(done);
+            }
+        }
+
        Inst::Imm_R {
            dst_is_64,
            simm64,
@ -546,7 +806,12 @@ pub(crate) fn emit(
            emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
        }

-        Inst::MovZX_RM_R { ext_mode, src, dst } => {
+        Inst::MovZX_RM_R {
+            ext_mode,
+            src,
+            dst,
+            srcloc,
+        } => {
            let (opcodes, num_opcodes, rex_flags) = match ext_mode {
                ExtMode::BL => {
                    // MOVZBL is (REX.W==0) 0F B6 /r
@ -588,27 +853,45 @@ pub(crate) fn emit(
                    *src,
                    rex_flags,
                ),
-                RegMem::Mem { addr: src } => emit_std_reg_mem(
-                    sink,
-                    LegacyPrefix::None,
-                    opcodes,
-                    num_opcodes,
-                    dst.to_reg(),
-                    &src.finalize(state),
-                    rex_flags,
-                ),
+                RegMem::Mem { addr: src } => {
+                    let src = &src.finalize(state);
+
+                    if let Some(srcloc) = *srcloc {
+                        // Register the offset at which the actual load instruction starts.
+                        sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                    }
+
+                    emit_std_reg_mem(
+                        sink,
+                        LegacyPrefix::None,
+                        opcodes,
+                        num_opcodes,
+                        dst.to_reg(),
+                        src,
+                        rex_flags,
+                    )
+                }
            }
        }

-        Inst::Mov64_M_R { src, dst } => emit_std_reg_mem(
-            sink,
-            LegacyPrefix::None,
-            0x8B,
-            1,
-            dst.to_reg(),
-            &src.finalize(state),
-            RexFlags::set_w(),
-        ),
+        Inst::Mov64_M_R { src, dst, srcloc } => {
+            let src = &src.finalize(state);
+
+            if let Some(srcloc) = *srcloc {
+                // Register the offset at which the actual load instruction starts.
+                sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+            }
+
+            emit_std_reg_mem(
+                sink,
+                LegacyPrefix::None,
+                0x8B,
+                1,
+                dst.to_reg(),
+                src,
+                RexFlags::set_w(),
+            )
+        }

        Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
            sink,
@ -620,7 +903,12 @@ pub(crate) fn emit(
            RexFlags::set_w(),
        ),

-        Inst::MovSX_RM_R { ext_mode, src, dst } => {
+        Inst::MovSX_RM_R {
+            ext_mode,
+            src,
+            dst,
+            srcloc,
+        } => {
            let (opcodes, num_opcodes, rex_flags) = match ext_mode {
                ExtMode::BL => {
                    // MOVSBL is (REX.W==0) 0F BE /r
@ -654,21 +942,41 @@ pub(crate) fn emit(
                    *src,
                    rex_flags,
                ),
-                RegMem::Mem { addr: src } => emit_std_reg_mem(
-                    sink,
-                    LegacyPrefix::None,
-                    opcodes,
-                    num_opcodes,
-                    dst.to_reg(),
-                    &src.finalize(state),
-                    rex_flags,
-                ),
+
+                RegMem::Mem { addr: src } => {
+                    let src = &src.finalize(state);
+
+                    if let Some(srcloc) = *srcloc {
+                        // Register the offset at which the actual load instruction starts.
+                        sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                    }
+
+                    emit_std_reg_mem(
+                        sink,
+                        LegacyPrefix::None,
+                        opcodes,
+                        num_opcodes,
+                        dst.to_reg(),
+                        src,
+                        rex_flags,
+                    )
+                }
            }
        }

-        Inst::Mov_R_M { size, src, dst } => {
+        Inst::Mov_R_M {
+            size,
+            src,
+            dst,
+            srcloc,
+        } => {
            let dst = &dst.finalize(state);

+            if let Some(srcloc) = *srcloc {
+                // Register the offset at which the actual load instruction starts.
+                sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+            }
+
            match size {
                1 => {
                    // This is one of the few places where the presence of a
@ -736,9 +1044,11 @@ pub(crate) fn emit(
        } => {
            let enc_dst = int_reg_enc(dst.to_reg());
            let subopcode = match kind {
-                ShiftKind::Left => 4,
-                ShiftKind::RightZ => 5,
-                ShiftKind::RightS => 7,
+                ShiftKind::RotateLeft => 0,
+                ShiftKind::RotateRight => 1,
+                ShiftKind::ShiftLeft => 4,
+                ShiftKind::ShiftRightLogical => 5,
+                ShiftKind::ShiftRightArithmetic => 7,
            };

            let rex = if *is_64 {
@ -849,6 +1159,30 @@ pub(crate) fn emit(
            );
        }

+        Inst::Cmove {
+            size,
+            cc,
+            src,
+            dst: reg_g,
+        } => {
+            let (prefix, rex_flags) = match size {
+                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefix::None, RexFlags::clear_w()),
+                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                _ => unreachable!("invalid size spec for cmove"),
+            };
+            let opcode = 0x0F40 + cc.get_enc() as u32;
+            match src {
+                RegMem::Reg { reg: reg_e } => {
+                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
+                }
+                RegMem::Mem { addr } => {
+                    let addr = &addr.finalize(state);
+                    emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex_flags);
+                }
+            }
+        }
+
        Inst::Push64 { src } => {
            match src {
                RegMemImm::Reg { reg } => {
@ -1027,10 +1361,112 @@ pub(crate) fn emit(
            }
        }

+        Inst::JmpTableSeq {
+            idx,
+            tmp1,
+            tmp2,
+            ref targets,
+            default_target,
+            ..
+        } => {
+            // This sequence is *one* instruction in the vcode, and is expanded only here at
+            // emission time, because we cannot allow the regalloc to insert spills/reloads in
+            // the middle; we depend on hardcoded PC-rel addressing below.
+            //
+            // We don't have to worry about emitting islands, because the only label-use type has a
+            // maximum range of 2 GB. If we later consider using shorter-range label references,
+            // this will need to be revisited.
+
+            // Save index in a tmp (the live range of ridx only goes to start of this
+            // sequence; rtmp1 or rtmp2 may overwrite it).
+
+            // We generate the following sequence:
+            // ;; generated by lowering: cmp #jmp_table_size, %idx
+            // jnb $default_target
+            // movl %idx, %tmp2
+            // lea start_of_jump_table_offset(%rip), %tmp1
+            // movzlq [%tmp1, %tmp2], %tmp2
+            // addq %tmp2, %tmp1
+            // j *%tmp1
+            // $start_of_jump_table:
+            // -- jump table entries
+            let default_label = match default_target {
+                BranchTarget::Label(label) => label,
+                _ => unreachable!(),
+            };
+            one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size
+
+            // Copy the index (and make sure to clear the high 32-bits lane of tmp2).
+            let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2, None);
+            inst.emit(sink, flags, state);
+
+            // Load base address of jump table.
+            let start_of_jumptable = sink.get_label();
+            let inst = Inst::lea(
+                Amode::rip_relative(BranchTarget::Label(start_of_jumptable)),
+                *tmp1,
+            );
+            inst.emit(sink, flags, state);
+
+            // Load value out of jump table.
+            let inst = Inst::movzx_rm_r(
+                ExtMode::LQ,
+                RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
+                *tmp2,
+                None,
+            );
+            inst.emit(sink, flags, state);
+
+            // Add base of jump table to jump-table-sourced block offset.
+            let inst = Inst::alu_rmi_r(
+                true, /* is_64 */
+                AluRmiROpcode::Add,
+                RegMemImm::reg(tmp2.to_reg()),
+                *tmp1,
+            );
+            inst.emit(sink, flags, state);
+
+            // Branch to computed address.
+            let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
+            inst.emit(sink, flags, state);
+
+            // Emit jump table (table of 32-bit offsets).
+            sink.bind_label(start_of_jumptable);
+            let jt_off = sink.cur_offset();
+            for &target in targets.iter() {
+                let word_off = sink.cur_offset();
+                // off_into_table is an addend here embedded in the label to be later patched at
+                // the end of codegen. The offset is initially relative to this jump table entry;
+                // with the extra addend, it'll be relative to the jump table's start, after
+                // patching.
+                let off_into_table = word_off - jt_off;
+                sink.use_label_at_offset(word_off, target.as_label().unwrap(), LabelUse::PCRel32);
+                sink.put4(off_into_table);
+            }
+        }
+
+        Inst::TrapIf {
+            cc,
+            trap_code,
+            srcloc,
+        } => {
+            let else_label = sink.get_label();
+
+            // Jump over if the invert of CC is set (i.e. CC is not set).
+            one_way_jmp(sink, cc.invert(), else_label);
+
+            // Trap!
+            let inst = Inst::trap(*srcloc, *trap_code);
+            inst.emit(sink, flags, state);
+
+            sink.bind_label(else_label);
+        }
+
        Inst::XMM_Mov_RM_R {
            op,
            src: src_e,
            dst: reg_g,
+            srcloc,
        } => {
            let rex = RexFlags::clear_w();
            let (prefix, opcode) = match op {
@ -1045,9 +1481,12 @@ pub(crate) fn emit(
                RegMem::Reg { reg: reg_e } => {
                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
                }
-
                RegMem::Mem { addr } => {
                    let addr = &addr.finalize(state);
+                    if let Some(srcloc) = *srcloc {
+                        // Register the offset at which the actual load instruction starts.
+                        sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+                    }
                    emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
                }
            }
@ -1075,14 +1514,19 @@ pub(crate) fn emit(
                RegMem::Reg { reg: reg_e } => {
                    emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
                }
-
                RegMem::Mem { addr } => {
                    let addr = &addr.finalize(state);
                    emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
                }
            }
        }
-        Inst::XMM_Mov_R_M { op, src, dst } => {
+
+        Inst::XMM_Mov_R_M {
+            op,
+            src,
+            dst,
+            srcloc,
+        } => {
            let rex = RexFlags::clear_w();
            let (prefix, opcode) = match op {
                SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E),
@ -1091,8 +1535,32 @@ pub(crate) fn emit(
            };

            let dst = &dst.finalize(state);
+            if let Some(srcloc) = *srcloc {
+                // Register the offset at which the actual load instruction starts.
+                sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+            }
            emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex);
        }
+
+        Inst::LoadExtName {
+            dst,
+            name,
+            offset,
+            srcloc,
+        } => {
+            // The full address can be encoded in the register, with a relocation.
+            // Generates: movabsq $name, %dst
+            let enc_dst = int_reg_enc(dst.to_reg());
+            sink.put1(0x48 | ((enc_dst >> 3) & 1));
+            sink.put1(0xB8 | (enc_dst & 7));
+            sink.add_reloc(*srcloc, Reloc::Abs8, name, *offset);
+            if flags.emit_all_ones_funcaddrs() {
+                sink.put8(u64::max_value());
+            } else {
+                sink.put8(0);
+            }
+        }
+
        Inst::Hlt => {
            sink.put1(0xcc);
        }
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
@ -4,16 +4,17 @@
 #![allow(non_snake_case)]
 #![allow(non_camel_case_types)]

+use alloc::boxed::Box;
 use alloc::vec::Vec;
-use smallvec::SmallVec;
 use std::fmt;
 use std::string::{String, ToString};

 use regalloc::RegUsageCollector;
 use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
+use smallvec::SmallVec;

 use crate::binemit::CodeOffset;
-use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
+use crate::ir::types::*;
 use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
 use crate::machinst::*;
 use crate::settings::Flags;
@ -49,6 +50,49 @@ pub enum Inst {
        dst: Writable<Reg>,
    },

+    /// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
+    UnaryRmR {
+        size: u8, // 2, 4 or 8
+        op: UnaryRmROpcode,
+        src: RegMem,
+        dst: Writable<Reg>,
+    },
+
+    /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
+    Div {
+        size: u8, // 1, 2, 4 or 8
+        signed: bool,
+        divisor: RegMem,
+        loc: SourceLoc,
+    },
+
+    /// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
+    MulHi { size: u8, signed: bool, rhs: RegMem },
+
+    /// A synthetic sequence to implement the right inline checks for remainder and division,
+    /// assuming the dividend is in %rax.
+    /// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div
+    /// instruction does.
+    /// The generated code sequence is described in the emit's function match arm for this
+    /// instruction.
+    ///
+    /// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem
+    /// with the temporary and divisor registers. Make sure to zero %rdx right before this
+    /// instruction, or you might run into regalloc failures where %rdx is live before its first
+    /// def!
+    CheckedDivOrRemSeq {
+        kind: DivOrRemKind,
+        size: u8,
+        divisor: Reg,
+        tmp: Option<Writable<Reg>>,
+        loc: SourceLoc,
+    },
+
+    /// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
+    SignExtendRaxRdx {
+        size: u8, // 1, 2, 4 or 8
+    },
+
    /// Constant materialization: (imm32 imm64) reg.
    /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
    Imm_R {
@ -71,12 +115,16 @@ pub enum Inst {
        ext_mode: ExtMode,
        src: RegMem,
        dst: Writable<Reg>,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    /// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
    Mov64_M_R {
        src: SyntheticAmode,
        dst: Writable<Reg>,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    /// Loads the memory address of addr into dst.
@ -90,6 +138,8 @@ pub enum Inst {
        ext_mode: ExtMode,
        src: RegMem,
        dst: Writable<Reg>,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    /// Integer stores: mov (b w l q) reg addr.
@ -97,6 +147,8 @@ pub enum Inst {
        size: u8, // 1, 2, 4 or 8.
        src: Reg,
        dst: SyntheticAmode,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    /// Arithmetic shifts: (shl shr sar) (l q) imm reg.
@ -118,6 +170,16 @@ pub enum Inst {
    /// Materializes the requested condition code in the destination reg.
    Setcc { cc: CC, dst: Writable<Reg> },

+    /// Integer conditional move.
+    /// Overwrites the destination register.
+    Cmove {
+        /// Possible values are 2, 4 or 8. Checked in the related factory.
+        size: u8,
+        cc: CC,
+        src: RegMem,
+        dst: Writable<Reg>,
+    },
+
    // =====================================
    // Stack manipulation.
    /// pushq (reg addr imm)
@ -143,6 +205,8 @@ pub enum Inst {
        op: SseOpcode,
        src: RegMem,
        dst: Writable<Reg>,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    /// mov reg addr (good for all memory stores from xmm registers)
@ -150,6 +214,8 @@ pub enum Inst {
        op: SseOpcode,
        src: Reg,
        dst: SyntheticAmode,
+        /// Source location, if the memory access can be out-of-bounds.
+        srcloc: Option<SourceLoc>,
    },

    // =====================================
@ -190,15 +256,42 @@ pub enum Inst {
        not_taken: BranchTarget,
    },

+    /// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale).
+    /// The generated code sequence is described in the emit's function match arm for this
+    /// instruction.
+    JmpTableSeq {
+        idx: Reg,
+        tmp1: Writable<Reg>,
+        tmp2: Writable<Reg>,
+        default_target: BranchTarget,
+        targets: Vec<BranchTarget>,
+        targets_for_term: Vec<MachLabel>,
+    },
+
    /// Indirect jump: jmpq (reg mem).
    JmpUnknown { target: RegMem },

+    /// Traps if the condition code is set.
+    TrapIf {
+        cc: CC,
+        trap_code: TrapCode,
+        srcloc: SourceLoc,
+    },
+
    /// A debug trap.
    Hlt,

    /// An instruction that will always trigger the illegal instruction exception.
    Ud2 { trap_info: (SourceLoc, TrapCode) },

+    /// Loads an external symbol in a register, with a relocation: movabsq $name, dst
+    LoadExtName {
+        dst: Writable<Reg>,
+        name: Box<ExternalName>,
+        srcloc: SourceLoc,
+        offset: i64,
+    },
+
    // =====================================
    // Meta-instructions generating no code.
    /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
@ -206,15 +299,13 @@ pub enum Inst {
    VirtualSPOffsetAdj { offset: i64 },
 }

-// Handy constructors for Insts.
-
-// For various sizes, will some number of lowest bits sign extend to be the
-// same as the whole value?
-pub(crate) fn low32willSXto64(x: u64) -> bool {
+pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
    let xs = x as i64;
    xs == ((xs << 32) >> 32)
 }

+// Handy constructors for Insts.
+
 impl Inst {
    pub(crate) fn nop(len: u8) -> Self {
        debug_assert!(len <= 16);
@ -236,10 +327,45 @@ impl Inst {
        }
    }

+    pub(crate) fn unary_rm_r(
+        size: u8,
+        op: UnaryRmROpcode,
+        src: RegMem,
+        dst: Writable<Reg>,
+    ) -> Self {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        debug_assert!(size == 8 || size == 4 || size == 2);
+        Self::UnaryRmR { size, op, src, dst }
+    }
+
+    pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        Inst::Div {
+            size,
+            signed,
+            divisor,
+            loc,
+        }
+    }
+
+    pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+        Inst::MulHi { size, signed, rhs }
+    }
+
+    pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2);
+        Inst::SignExtendRaxRdx { size }
+    }
+
    pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
        if !dst_is_64 {
-            debug_assert!(low32willSXto64(simm64));
+            debug_assert!(
+                low32_will_sign_extend_to_64(simm64),
+                "{} won't sign-extend to 64 bits!",
+                simm64
+            );
        }
        Inst::Imm_R {
            dst_is_64,
@ -254,9 +380,19 @@ impl Inst {
        Inst::Mov_R_R { is_64, src, dst }
    }

-    pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+    pub(crate) fn xmm_mov_rm_r(
+        op: SseOpcode,
+        src: RegMem,
+        dst: Writable<Reg>,
+        srcloc: Option<SourceLoc>,
+    ) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
-        Inst::XMM_Mov_RM_R { op, src, dst }
+        Inst::XMM_Mov_RM_R {
+            op,
+            src,
+            dst,
+            srcloc,
+        }
    }

    pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
@ -264,37 +400,77 @@ impl Inst {
        Inst::XMM_RM_R { op, src, dst }
    }

-    pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
+    pub(crate) fn xmm_mov_r_m(
+        op: SseOpcode,
+        src: Reg,
+        dst: impl Into<SyntheticAmode>,
+        srcloc: Option<SourceLoc>,
+    ) -> Inst {
        debug_assert!(src.get_class() == RegClass::V128);
        Inst::XMM_Mov_R_M {
            op,
            src,
            dst: dst.into(),
+            srcloc,
        }
    }

-    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
+    pub(crate) fn movzx_rm_r(
+        ext_mode: ExtMode,
+        src: RegMem,
+        dst: Writable<Reg>,
+        srcloc: Option<SourceLoc>,
+    ) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::MovZX_RM_R { ext_mode, src, dst }
+        Inst::MovZX_RM_R {
+            ext_mode,
+            src,
+            dst,
+            srcloc,
+        }
    }

-    pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
+    pub(crate) fn movsx_rm_r(
+        ext_mode: ExtMode,
+        src: RegMem,
+        dst: Writable<Reg>,
+        srcloc: Option<SourceLoc>,
+    ) -> Inst {
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::MovSX_RM_R {
+            ext_mode,
+            src,
+            dst,
+            srcloc,
+        }
+    }
+
+    pub(crate) fn mov64_m_r(
+        src: impl Into<SyntheticAmode>,
+        dst: Writable<Reg>,
+        srcloc: Option<SourceLoc>,
+    ) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
        Inst::Mov64_M_R {
            src: src.into(),
            dst,
+            srcloc,
        }
    }

-    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
-        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
-        Inst::MovSX_RM_R { ext_mode, src, dst }
+    /// A convenience function to be able to use a RegMem as the source of a move.
+    pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>, srcloc: Option<SourceLoc>) -> Inst {
+        match src {
+            RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
+            RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc),
+        }
    }

    pub(crate) fn mov_r_m(
        size: u8, // 1, 2, 4 or 8
        src: Reg,
        dst: impl Into<SyntheticAmode>,
+        srcloc: Option<SourceLoc>,
    ) -> Inst {
        debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
        debug_assert!(src.get_class() == RegClass::I64);
@ -302,6 +478,7 @@ impl Inst {
            size,
            src,
            dst: dst.into(),
+            srcloc,
        }
    }

@ -345,11 +522,23 @@ impl Inst {
        Inst::Cmp_RMI_R { size, src, dst }
    }

+    pub(crate) fn trap(srcloc: SourceLoc, trap_code: TrapCode) -> Inst {
+        Inst::Ud2 {
+            trap_info: (srcloc, trap_code),
+        }
+    }
+
    pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
        Inst::Setcc { cc, dst }
    }

+    pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+        debug_assert!(size == 8 || size == 4 || size == 2);
+        debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+        Inst::Cmove { size, cc, src, dst }
+    }
+
    pub(crate) fn push64(src: RegMemImm) -> Inst {
        Inst::Push64 { src }
    }
@ -413,6 +602,14 @@ impl Inst {
    pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
        Inst::JmpUnknown { target }
    }
+
+    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode, srcloc: SourceLoc) -> Inst {
+        Inst::TrapIf {
+            cc,
+            trap_code,
+            srcloc,
+        }
+    }
 }

 //=============================================================================
@ -458,6 +655,7 @@ impl ShowWithRRU for Inst {

        match self {
            Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
+
            Inst::Alu_RMI_R {
                is_64,
                op,
@ -469,13 +667,68 @@ impl ShowWithRRU for Inst {
                src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
            ),
-            Inst::XMM_Mov_RM_R { op, src, dst } => format!(
+
+            Inst::UnaryRmR { src, dst, op, size } => format!(
+                "{} {}, {}",
+                ljustify2(op.to_string(), suffixBWLQ(*size)),
+                src.show_rru_sized(mb_rru, *size),
+                show_ireg_sized(dst.to_reg(), mb_rru, *size),
+            ),
+
+            Inst::Div {
+                size,
+                signed,
+                divisor,
+                ..
+            } => format!(
+                "{} {}",
+                ljustify(if *signed {
+                    "idiv".to_string()
+                } else {
+                    "div".into()
+                }),
+                divisor.show_rru_sized(mb_rru, *size)
+            ),
+            Inst::MulHi {
+                size, signed, rhs, ..
+            } => format!(
+                "{} {}",
+                ljustify(if *signed {
+                    "imul".to_string()
+                } else {
+                    "mul".to_string()
+                }),
+                rhs.show_rru_sized(mb_rru, *size)
+            ),
+            Inst::CheckedDivOrRemSeq {
+                kind,
+                size,
+                divisor,
+                ..
+            } => format!(
+                "{} $rax:$rdx, {}",
+                match kind {
+                    DivOrRemKind::SignedDiv => "sdiv",
+                    DivOrRemKind::UnsignedDiv => "udiv",
+                    DivOrRemKind::SignedRem => "srem",
+                    DivOrRemKind::UnsignedRem => "urem",
+                },
+                show_ireg_sized(*divisor, mb_rru, *size),
+            ),
+            Inst::SignExtendRaxRdx { size } => match size {
+                2 => "cwd",
+                4 => "cdq",
+                8 => "cqo",
+                _ => unreachable!(),
+            }
+            .into(),
+            Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!(
                "{} {}, {}",
                ljustify(op.to_string()),
                src.show_rru_sized(mb_rru, op.src_size()),
                show_ireg_sized(dst.to_reg(), mb_rru, 8),
            ),
-            Inst::XMM_Mov_R_M { op, src, dst } => format!(
+            Inst::XMM_Mov_R_M { op, src, dst, .. } => format!(
                "{} {}, {}",
                ljustify(op.to_string()),
                show_ireg_sized(*src, mb_rru, 8),
@ -514,7 +767,9 @@ impl ShowWithRRU for Inst {
                show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
                show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
            ),
-            Inst::MovZX_RM_R { ext_mode, src, dst } => {
+            Inst::MovZX_RM_R {
+                ext_mode, src, dst, ..
+            } => {
                if *ext_mode == ExtMode::LQ {
                    format!(
                        "{} {}, {}",
@ -531,7 +786,7 @@ impl ShowWithRRU for Inst {
                    )
                }
            }
-            Inst::Mov64_M_R { src, dst } => format!(
+            Inst::Mov64_M_R { src, dst, .. } => format!(
                "{} {}, {}",
                ljustify("movq".to_string()),
                src.show_rru(mb_rru),
@ -543,13 +798,15 @@ impl ShowWithRRU for Inst {
                addr.show_rru(mb_rru),
                dst.show_rru(mb_rru)
            ),
-            Inst::MovSX_RM_R { ext_mode, src, dst } => format!(
+            Inst::MovSX_RM_R {
+                ext_mode, src, dst, ..
+            } => format!(
                "{} {}, {}",
                ljustify2("movs".to_string(), ext_mode.to_string()),
                src.show_rru_sized(mb_rru, ext_mode.src_size()),
                show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
            ),
-            Inst::Mov_R_M { size, src, dst } => format!(
+            Inst::Mov_R_M { size, src, dst, .. } => format!(
                "{} {}, {}",
                ljustify2("mov".to_string(), suffixBWLQ(*size)),
                show_ireg_sized(*src, mb_rru, *size),
@ -585,6 +842,12 @@ impl ShowWithRRU for Inst {
                ljustify2("set".to_string(), cc.to_string()),
                show_ireg_sized(dst.to_reg(), mb_rru, 1)
            ),
+            Inst::Cmove { size, cc, src, dst } => format!(
+                "{} {}, {}",
+                ljustify(format!("cmov{}{}", cc.to_string(), suffixBWLQ(*size))),
+                src.show_rru_sized(mb_rru, *size),
+                show_ireg_sized(dst.to_reg(), mb_rru, *size)
+            ),
            Inst::Push64 { src } => {
                format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
            }
@ -612,12 +875,27 @@ impl ShowWithRRU for Inst {
                taken.show_rru(mb_rru),
                not_taken.show_rru(mb_rru)
            ),
+            Inst::JmpTableSeq { idx, .. } => {
+                format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
+            }
            //
            Inst::JmpUnknown { target } => format!(
                "{} *{}",
                ljustify("jmp".to_string()),
                target.show_rru(mb_rru)
            ),
+            Inst::TrapIf { cc, trap_code, .. } => {
+                format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
+            }
+            Inst::LoadExtName {
+                dst, name, offset, ..
+            } => format!(
+                "{} {}+{}, {}",
+                ljustify("movaps".into()),
+                name,
+                offset,
+                show_ireg_sized(dst.to_reg(), mb_rru, 8),
+            ),
            Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
            Inst::Hlt => "hlt".into(),
            Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1),
@ -639,16 +917,36 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
    // regalloc.rs will "fix" this for us by removing the the modified set from the use and def
    // sets.
    match inst {
-        Inst::Alu_RMI_R {
-            is_64: _,
-            op: _,
-            src,
-            dst,
-        } => {
+        Inst::Alu_RMI_R { src, dst, .. } => {
            src.get_regs_as_uses(collector);
            collector.add_mod(*dst);
        }
-        Inst::XMM_Mov_RM_R { src, dst, .. } => {
+        Inst::Div { divisor, .. } => {
+            collector.add_mod(Writable::from_reg(regs::rax()));
+            collector.add_mod(Writable::from_reg(regs::rdx()));
+            divisor.get_regs_as_uses(collector);
+        }
+        Inst::MulHi { rhs, .. } => {
+            collector.add_mod(Writable::from_reg(regs::rax()));
+            collector.add_def(Writable::from_reg(regs::rdx()));
+            rhs.get_regs_as_uses(collector);
+        }
+        Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+            // Mark both fixed registers as mods, to avoid an early clobber problem in codegen
+            // (i.e. the temporary is allocated one of the fixed registers). This requires writing
+            // the rdx register *before* the instruction, which is not too bad.
+            collector.add_mod(Writable::from_reg(regs::rax()));
+            collector.add_mod(Writable::from_reg(regs::rdx()));
+            collector.add_use(*divisor);
+            if let Some(tmp) = tmp {
+                collector.add_def(*tmp);
+            }
+        }
+        Inst::SignExtendRaxRdx { .. } => {
+            collector.add_use(regs::rax());
+            collector.add_mod(Writable::from_reg(regs::rdx()));
+        }
+        Inst::UnaryRmR { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => {
            src.get_regs_as_uses(collector);
            collector.add_def(*dst);
        }
@ -671,7 +969,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            src.get_regs_as_uses(collector);
            collector.add_def(*dst);
        }
-        Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+        Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
            src.get_regs_as_uses(collector);
            collector.add_def(*dst)
        }
@ -683,24 +981,23 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_use(*src);
            dst.get_regs_as_uses(collector);
        }
-        Inst::Shift_R {
-            is_64: _,
-            kind: _,
-            num_bits,
-            dst,
-        } => {
+        Inst::Shift_R { num_bits, dst, .. } => {
            if num_bits.is_none() {
                collector.add_use(regs::rcx());
            }
            collector.add_mod(*dst);
        }
-        Inst::Cmp_RMI_R { size: _, src, dst } => {
+        Inst::Cmp_RMI_R { src, dst, .. } => {
            src.get_regs_as_uses(collector);
            collector.add_use(*dst); // yes, really `add_use`
        }
        Inst::Setcc { dst, .. } => {
            collector.add_def(*dst);
        }
+        Inst::Cmove { src, dst, .. } => {
+            src.get_regs_as_uses(collector);
+            collector.add_mod(*dst);
+        }
        Inst::Push64 { src } => {
            src.get_regs_as_uses(collector);
            collector.add_mod(Writable::from_reg(regs::rsp()));
@ -727,12 +1024,31 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            dest.get_regs_as_uses(collector);
        }

+        Inst::JmpTableSeq {
+            ref idx,
+            ref tmp1,
+            ref tmp2,
+            ..
+        } => {
+            collector.add_use(*idx);
+            collector.add_def(*tmp1);
+            collector.add_def(*tmp2);
+        }
+
+        Inst::JmpUnknown { target } => {
+            target.get_regs_as_uses(collector);
+        }
+
+        Inst::LoadExtName { dst, .. } => {
+            collector.add_def(*dst);
+        }
+
        Inst::Ret
        | Inst::EpiloguePlaceholder
        | Inst::JmpKnown { .. }
        | Inst::JmpCond { .. }
        | Inst::Nop { .. }
-        | Inst::JmpUnknown { .. }
+        | Inst::TrapIf { .. }
        | Inst::VirtualSPOffsetAdj { .. }
        | Inst::Hlt
        | Inst::Ud2 { .. } => {
@ -768,19 +1084,18 @@ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
 impl Amode {
    fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
        match self {
-            Amode::ImmReg {
-                simm32: _,
-                ref mut base,
-            } => map_use(map, base),
+            Amode::ImmReg { ref mut base, .. } => map_use(map, base),
            Amode::ImmRegRegShift {
-                simm32: _,
                ref mut base,
                ref mut index,
-                shift: _,
+                ..
            } => {
                map_use(map, base);
                map_use(map, index);
            }
+            Amode::RipRelative { .. } => {
+                // RIP isn't involved in regalloc.
+            }
        }
    }
 }
@ -790,7 +1105,7 @@ impl RegMemImm {
        match self {
            RegMemImm::Reg { ref mut reg } => map_use(map, reg),
            RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
-            RegMemImm::Imm { simm32: _ } => {}
+            RegMemImm::Imm { .. } => {}
        }
    }
 }
@ -799,7 +1114,7 @@ impl RegMem {
    fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
        match self {
            RegMem::Reg { ref mut reg } => map_use(map, reg),
-            RegMem::Mem { ref mut addr } => addr.map_uses(map),
+            RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
        }
    }
 }
@ -809,18 +1124,31 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
    match inst {
        // ** Nop
        Inst::Alu_RMI_R {
-            is_64: _,
-            op: _,
            ref mut src,
            ref mut dst,
+            ..
        } => {
            src.map_uses(mapper);
            map_mod(mapper, dst);
        }
+        Inst::Div { divisor, .. } => divisor.map_uses(mapper),
+        Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
+        Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+            map_use(mapper, divisor);
+            if let Some(tmp) = tmp {
+                map_def(mapper, tmp)
+            }
+        }
+        Inst::SignExtendRaxRdx { .. } => {}
        Inst::XMM_Mov_RM_R {
            ref mut src,
            ref mut dst,
            ..
+        }
+        | Inst::UnaryRmR {
+            ref mut src,
+            ref mut dst,
+            ..
        } => {
            src.map_uses(mapper);
            map_def(mapper, dst);
@ -841,15 +1169,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_use(mapper, src);
            dst.map_uses(mapper);
        }
-        Inst::Imm_R {
-            dst_is_64: _,
-            simm64: _,
-            ref mut dst,
-        } => map_def(mapper, dst),
+        Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
        Inst::Mov_R_R {
-            is_64: _,
            ref mut src,
            ref mut dst,
+            ..
        } => {
            map_use(mapper, src);
            map_def(mapper, dst);
@ -862,7 +1186,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            src.map_uses(mapper);
            map_def(mapper, dst);
        }
-        Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+        Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
            src.map_uses(mapper);
            map_def(mapper, dst);
        }
@ -882,23 +1206,26 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_use(mapper, src);
            dst.map_uses(mapper);
        }
-        Inst::Shift_R {
-            is_64: _,
-            kind: _,
-            num_bits: _,
-            ref mut dst,
-        } => {
+        Inst::Shift_R { ref mut dst, .. } => {
            map_mod(mapper, dst);
        }
        Inst::Cmp_RMI_R {
-            size: _,
            ref mut src,
            ref mut dst,
+            ..
        } => {
            src.map_uses(mapper);
            map_use(mapper, dst);
        }
        Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
+        Inst::Cmove {
+            ref mut src,
+            ref mut dst,
+            ..
+        } => {
+            src.map_uses(mapper);
+            map_mod(mapper, dst)
+        }
        Inst::Push64 { ref mut src } => src.map_uses(mapper),
        Inst::Pop64 { ref mut dst } => {
            map_def(mapper, dst);
@ -932,12 +1259,27 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            dest.map_uses(mapper);
        }

+        Inst::JmpTableSeq {
+            ref mut idx,
+            ref mut tmp1,
+            ref mut tmp2,
+            ..
+        } => {
+            map_use(mapper, idx);
+            map_def(mapper, tmp1);
+            map_def(mapper, tmp2);
+        }
+
+        Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
+
+        Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
+
        Inst::Ret
        | Inst::EpiloguePlaceholder
        | Inst::JmpKnown { .. }
        | Inst::JmpCond { .. }
        | Inst::Nop { .. }
-        | Inst::JmpUnknown { .. }
+        | Inst::TrapIf { .. }
        | Inst::VirtualSPOffsetAdj { .. }
        | Inst::Ud2 { .. }
        | Inst::Hlt => {
@ -964,8 +1306,10 @@ impl MachInst for Inst {
        // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
        // %reg.
        match self {
-            Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
-            Self::XMM_Mov_RM_R { op, src, dst }
+            Self::Mov_R_R {
+                is_64, src, dst, ..
+            } if *is_64 => Some((*dst, *src)),
+            Self::XMM_Mov_RM_R { op, src, dst, .. }
                if *op == SseOpcode::Movss
                    || *op == SseOpcode::Movsd
                    || *op == SseOpcode::Movaps =>
@ -994,10 +1338,12 @@ impl MachInst for Inst {
            &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
            &Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()),
            &Self::JmpCond {
-                cc: _,
-                taken,
-                not_taken,
+                taken, not_taken, ..
            } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
+            &Self::JmpTableSeq {
+                ref targets_for_term,
+                ..
+            } => MachTerminator::Indirect(&targets_for_term[..]),
            // All other cases are boring.
            _ => MachTerminator::None,
        }
@ -1011,8 +1357,8 @@ impl MachInst for Inst {
        match rc_dst {
            RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
            RegClass::V128 => match ty {
-                F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg),
-                F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg),
+                F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
+                F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
                _ => panic!("unexpected V128 type in gen_move"),
            },
            _ => panic!("gen_move(x64): unhandled regclass"),
@ -1035,6 +1381,7 @@ impl MachInst for Inst {
        match ty {
            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
            F32 | F64 | I128 | B128 => Ok(RegClass::V128),
+            IFLAGS | FFLAGS => Ok(RegClass::I64),
            _ => Err(CodegenError::Unsupported(format!(
                "Unexpected SSA-value type: {}",
                ty
@ -1046,10 +1393,11 @@ impl MachInst for Inst {
        Inst::jmp_known(BranchTarget::Label(label))
    }

-    fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
+    fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]> {
        let mut ret = SmallVec::new();
-        let is64 = value > 0xffff_ffff;
-        ret.push(Inst::imm_r(is64, value, to_reg));
+        debug_assert!(ty.is_int(), "float constants NYI");
+        let is_64 = ty == I64 && value > 0x7fffffff;
+        ret.push(Inst::imm_r(is_64, value, to_reg));
        ret
    }

@ -1061,6 +1409,10 @@ impl MachInst for Inst {
        15
    }

+    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+        RegClass::I64
+    }
+
    type LabelUse = LabelUse;
 }

@ -1076,6 +1428,18 @@ impl MachInstEmit for Inst {
    fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut Self::State) {
        emit::emit(self, sink, flags, state);
    }
+
+    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String {
+        self.show_rru(mb_rru)
+    }
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+    fn new(_: &dyn ABIBody<I = Inst>) -> Self {
+        EmitState {
+            virtual_sp_offset: 0,
+        }
+    }
 }

 /// A label-use (internal relocation) in generated code.
@ -1085,6 +1449,10 @@ pub enum LabelUse {
    /// location. Used for control flow instructions which consider an offset from the start of the
    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
    JmpRel32,
+
+    /// A 32-bit offset from location of relocation itself, added to the existing value at that
+    /// location.
+    PCRel32,
 }

 impl MachInstLabelUse for LabelUse {
@ -1092,19 +1460,19 @@ impl MachInstLabelUse for LabelUse {

    fn max_pos_range(self) -> CodeOffset {
        match self {
-            LabelUse::JmpRel32 => 0x7fff_ffff,
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
        }
    }

    fn max_neg_range(self) -> CodeOffset {
        match self {
-            LabelUse::JmpRel32 => 0x8000_0000,
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
        }
    }

    fn patch_size(self) -> CodeOffset {
        match self {
-            LabelUse::JmpRel32 => 4,
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
        }
    }

@ -1119,24 +1487,29 @@ impl MachInstLabelUse for LabelUse {
                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
                buffer.copy_from_slice(&value.to_le_bytes()[..]);
            }
+            LabelUse::PCRel32 => {
+                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+                let value = pc_rel.wrapping_add(addend);
+                buffer.copy_from_slice(&value.to_le_bytes()[..]);
+            }
        }
    }

    fn supports_veneer(self) -> bool {
        match self {
-            LabelUse::JmpRel32 => false,
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
        }
    }

    fn veneer_size(self) -> CodeOffset {
        match self {
-            LabelUse::JmpRel32 => 0,
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
        }
    }

    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
        match self {
-            LabelUse::JmpRel32 => {
+            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
                panic!("Veneer not supported for JumpRel32 label-use.");
            }
        }
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
@ -33,46 +33,55 @@ fn gpr(enc: u8, index: u8) -> Reg {
 }

 pub(crate) fn r12() -> Reg {
-    gpr(ENC_R12, 0)
+    gpr(ENC_R12, 16)
 }
 pub(crate) fn r13() -> Reg {
-    gpr(ENC_R13, 1)
+    gpr(ENC_R13, 17)
 }
 pub(crate) fn r14() -> Reg {
-    gpr(ENC_R14, 2)
-}
-pub(crate) fn r15() -> Reg {
-    gpr(ENC_R15, 3)
+    gpr(ENC_R14, 18)
 }
 pub(crate) fn rbx() -> Reg {
-    gpr(ENC_RBX, 4)
+    gpr(ENC_RBX, 19)
 }
 pub(crate) fn rsi() -> Reg {
-    gpr(6, 5)
+    gpr(6, 20)
 }
 pub(crate) fn rdi() -> Reg {
-    gpr(7, 6)
+    gpr(7, 21)
 }
 pub(crate) fn rax() -> Reg {
-    gpr(0, 7)
+    gpr(0, 22)
 }
 pub(crate) fn rcx() -> Reg {
-    gpr(1, 8)
+    gpr(1, 23)
 }
 pub(crate) fn rdx() -> Reg {
-    gpr(2, 9)
+    gpr(2, 24)
 }
 pub(crate) fn r8() -> Reg {
-    gpr(8, 10)
+    gpr(8, 25)
 }
 pub(crate) fn r9() -> Reg {
-    gpr(9, 11)
+    gpr(9, 26)
 }
 pub(crate) fn r10() -> Reg {
-    gpr(10, 12)
+    gpr(10, 27)
 }
 pub(crate) fn r11() -> Reg {
-    gpr(11, 13)
+    gpr(11, 28)
+}
+
+pub(crate) fn r15() -> Reg {
+    // r15 is put aside since this is the pinned register.
+    gpr(ENC_R15, 29)
+}
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99
+pub(crate) fn pinned_reg() -> Reg {
+    r15()
 }

 fn fpr(enc: u8, index: u8) -> Reg {
@ -80,52 +89,52 @@ fn fpr(enc: u8, index: u8) -> Reg {
 }

 pub(crate) fn xmm0() -> Reg {
-    fpr(0, 14)
+    fpr(0, 0)
 }
 pub(crate) fn xmm1() -> Reg {
-    fpr(1, 15)
+    fpr(1, 1)
 }
 pub(crate) fn xmm2() -> Reg {
-    fpr(2, 16)
+    fpr(2, 2)
 }
 pub(crate) fn xmm3() -> Reg {
-    fpr(3, 17)
+    fpr(3, 3)
 }
 pub(crate) fn xmm4() -> Reg {
-    fpr(4, 18)
+    fpr(4, 4)
 }
 pub(crate) fn xmm5() -> Reg {
-    fpr(5, 19)
+    fpr(5, 5)
 }
 pub(crate) fn xmm6() -> Reg {
-    fpr(6, 20)
+    fpr(6, 6)
 }
 pub(crate) fn xmm7() -> Reg {
-    fpr(7, 21)
+    fpr(7, 7)
 }
 pub(crate) fn xmm8() -> Reg {
-    fpr(8, 22)
+    fpr(8, 8)
 }
 pub(crate) fn xmm9() -> Reg {
-    fpr(9, 23)
+    fpr(9, 9)
 }
 pub(crate) fn xmm10() -> Reg {
-    fpr(10, 24)
+    fpr(10, 10)
 }
 pub(crate) fn xmm11() -> Reg {
-    fpr(11, 25)
+    fpr(11, 11)
 }
 pub(crate) fn xmm12() -> Reg {
-    fpr(12, 26)
+    fpr(12, 12)
 }
 pub(crate) fn xmm13() -> Reg {
-    fpr(13, 27)
+    fpr(13, 13)
 }
 pub(crate) fn xmm14() -> Reg {
-    fpr(14, 28)
+    fpr(14, 14)
 }
 pub(crate) fn xmm15() -> Reg {
-    fpr(15, 29)
+    fpr(15, 15)
 }

 pub(crate) fn rsp() -> Reg {
@ -139,39 +148,14 @@ pub(crate) fn rbp() -> Reg {
 ///
 /// The ordering of registers matters, as commented in the file doc comment: assumes the
 /// calling-convention is SystemV, at the moment.
-pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
+pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
    let mut regs = Vec::<(RealReg, String)>::new();
    let mut allocable_by_class = [None; NUM_REG_CLASSES];

-    // Integer regs.
-    let mut base = regs.len();
-
-    // Callee-saved, in the SystemV x86_64 ABI.
-    regs.push((r12().to_real_reg(), "%r12".into()));
-    regs.push((r13().to_real_reg(), "%r13".into()));
-    regs.push((r14().to_real_reg(), "%r14".into()));
-    regs.push((r15().to_real_reg(), "%r15".into()));
-    regs.push((rbx().to_real_reg(), "%rbx".into()));
-
-    // Caller-saved, in the SystemV x86_64 ABI.
-    regs.push((rsi().to_real_reg(), "%rsi".into()));
-    regs.push((rdi().to_real_reg(), "%rdi".into()));
-    regs.push((rax().to_real_reg(), "%rax".into()));
-    regs.push((rcx().to_real_reg(), "%rcx".into()));
-    regs.push((rdx().to_real_reg(), "%rdx".into()));
-    regs.push((r8().to_real_reg(), "%r8".into()));
-    regs.push((r9().to_real_reg(), "%r9".into()));
-    regs.push((r10().to_real_reg(), "%r10".into()));
-    regs.push((r11().to_real_reg(), "%r11".into()));
-
-    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
-        suggested_scratch: Some(r12().get_index()),
-    });
+    let use_pinned_reg = flags.enable_pinned_reg();

    // XMM registers
-    base = regs.len();
+    let first_fpr = regs.len();
    regs.push((xmm0().to_real_reg(), "%xmm0".into()));
    regs.push((xmm1().to_real_reg(), "%xmm1".into()));
    regs.push((xmm2().to_real_reg(), "%xmm2".into()));
@ -188,17 +172,61 @@ pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUn
    regs.push((xmm13().to_real_reg(), "%xmm13".into()));
    regs.push((xmm14().to_real_reg(), "%xmm14".into()));
    regs.push((xmm15().to_real_reg(), "%xmm15".into()));
+    let last_fpr = regs.len() - 1;

+    // Integer regs.
+    let first_gpr = regs.len();
+
+    // Callee-saved, in the SystemV x86_64 ABI.
+    regs.push((r12().to_real_reg(), "%r12".into()));
+    regs.push((r13().to_real_reg(), "%r13".into()));
+    regs.push((r14().to_real_reg(), "%r14".into()));
+
+    regs.push((rbx().to_real_reg(), "%rbx".into()));
+
+    // Caller-saved, in the SystemV x86_64 ABI.
+    regs.push((rsi().to_real_reg(), "%rsi".into()));
+    regs.push((rdi().to_real_reg(), "%rdi".into()));
+    regs.push((rax().to_real_reg(), "%rax".into()));
+    regs.push((rcx().to_real_reg(), "%rcx".into()));
+    regs.push((rdx().to_real_reg(), "%rdx".into()));
+    regs.push((r8().to_real_reg(), "%r8".into()));
+    regs.push((r9().to_real_reg(), "%r9".into()));
+    regs.push((r10().to_real_reg(), "%r10".into()));
+    regs.push((r11().to_real_reg(), "%r11".into()));
+
+    // Other regs, not available to the allocator.
+    debug_assert_eq!(r15(), pinned_reg());
+    let allocable = if use_pinned_reg {
+        // The pinned register is not allocatable in this case, so record the length before adding
+        // it.
+        let len = regs.len();
+        regs.push((r15().to_real_reg(), "%r15/pinned".into()));
+        len
+    } else {
+        regs.push((r15().to_real_reg(), "%r15".into()));
+        regs.len()
+    };
+    let last_gpr = allocable - 1;
+
+    regs.push((rsp().to_real_reg(), "%rsp".into()));
+    regs.push((rbp().to_real_reg(), "%rbp".into()));
+
+    allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+        first: first_gpr,
+        last: last_gpr,
+        suggested_scratch: Some(r12().get_index()),
+    });
    allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
-        first: base,
-        last: regs.len() - 1,
+        first: first_fpr,
+        last: last_fpr,
        suggested_scratch: Some(xmm15().get_index()),
    });

-    // Other regs, not available to the allocator.
-    let allocable = regs.len();
-    regs.push((rsp().to_real_reg(), "%rsp".into()));
-    regs.push((rbp().to_real_reg(), "%rbp".into()));
+    // Sanity-check: the index passed to the Reg ctor must match the order in the register list.
+    for (i, reg) in regs.iter().enumerate() {
+        assert_eq!(i, reg.0.get_index());
+    }

    RealRegUniverse {
        regs,
--- a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@ -11,28 +11,33 @@ use crate::isa::Builder as IsaBuilder;
 use crate::machinst::pretty_print::ShowWithRRU;
 use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
 use crate::result::CodegenResult;
-use crate::settings::{self, Flags};
+use crate::settings::{self as shared_settings, Flags};

-use crate::isa::x64::inst::regs::create_reg_universe_systemv;
+use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
+
+use super::TargetIsa;

 mod abi;
 mod inst;
 mod lower;
+mod settings;

 /// An X64 backend.
 pub(crate) struct X64Backend {
    triple: Triple,
    flags: Flags,
+    _x64_flags: x64_settings::Flags,
    reg_universe: RealRegUniverse,
 }

 impl X64Backend {
    /// Create a new X64 backend with the given (shared) flags.
-    fn new_with_flags(triple: Triple, flags: Flags) -> Self {
+    fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
        let reg_universe = create_reg_universe_systemv(&flags);
        Self {
            triple,
            flags,
+            _x64_flags: x64_flags,
            reg_universe,
        }
    }
@ -103,10 +108,17 @@ impl MachBackend for X64Backend {
 pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
    IsaBuilder {
        triple,
-        setup: settings::builder(),
-        constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
-            let backend = X64Backend::new_with_flags(triple, flags);
-            Box::new(TargetIsaAdapter::new(backend))
-        },
+        setup: x64_settings::builder(),
+        constructor: isa_constructor,
    }
 }
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: Flags,
+    builder: shared_settings::Builder,
+) -> Box<dyn TargetIsa> {
+    let isa_flags = x64_settings::Flags::new(&shared_flags, builder);
+    let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
+    Box::new(TargetIsaAdapter::new(backend))
+}
--- a/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
@ -0,0 +1,9 @@
+//! x86 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
--- a/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
@ -1071,8 +1071,7 @@ pub fn create_unwind_info(
                .map(|u| UnwindInfo::SystemV(u))
        }
        CallConv::WindowsFastcall => {
-            super::unwind::winx64::create_unwind_info(func, isa, Some(RU::rbp.into()))?
-                .map(|u| UnwindInfo::WindowsX64(u))
+            super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
        }
        _ => None,
    })
--- a/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
@ -1313,6 +1313,79 @@ fn expand_fcvt_to_uint_sat(
    cfg.recompute_block(pos.func, done);
 }

+// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4.
+static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
+    0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
+];
+
+/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes
+/// using a long sequence of NaN quieting and truncation. This logic is separate from
+/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are
+/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into
+/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
+fn expand_fcvt_to_uint_sat_vector(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    if let ir::InstructionData::Unary {
+        opcode: ir::Opcode::FcvtToUintSat,
+        arg,
+    } = pos.func.dfg[inst]
+    {
+        let controlling_type = pos.func.dfg.ctrl_typevar(inst);
+        if controlling_type == I32X4 {
+            debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
+            // We must both quiet any NaNs--setting that lane to 0--and saturate any
+            // lanes that might overflow during conversion to the highest/lowest integer
+            // allowed in that lane.
+            let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
+            let max_signed_constant = pos
+                .func
+                .dfg
+                .constants
+                .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
+            let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
+            let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
+            // Clamp the input to 0 for negative floating point numbers. TODO we need to
+            // convert NaNs to 0 but this doesn't do that?
+            let ge_zero = pos.ins().x86_fmax(arg, zeroes);
+            // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert.
+            // For floating point numbers above this, CVTTPS2DQ returns the undefined value
+            // 0x80000000.
+            let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
+            let le_max_signed =
+                pos.ins()
+                    .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
+            // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0.
+            // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a
+            // valid, in-range addend.
+            let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
+            let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
+            let difference = pos
+                .ins()
+                .bxor(minus_max_signed_as_int, le_max_signed_as_int);
+            // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified
+            // previously (MSB set to 1).
+            let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
+            let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
+            // Convert the original clamped number to an integer and add back in the addend
+            // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these).
+            let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
+            pos.func.dfg.replace(inst).iadd(converted, addend);
+        } else {
+            unreachable!(
+                "{} should not be legalized in expand_fcvt_to_uint_sat_vector",
+                pos.func.dfg.display_inst(inst, None)
+            )
+        }
+    }
+}
+
 /// Convert shuffle instructions.
 fn convert_shuffle(
    inst: ir::Inst,
--- a/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
@ -57,20 +57,12 @@ fn isa_constructor(

    let isa_flags = settings::Flags::new(&shared_flags, builder);

-    if isa_flags.use_new_backend() {
-        #[cfg(not(feature = "x64"))]
-        panic!("new backend x86 support not included by cargo features!");
-
-        #[cfg(feature = "x64")]
-        super::x64::isa_builder(triple).finish(shared_flags)
-    } else {
-        Box::new(Isa {
-            triple,
-            isa_flags,
-            shared_flags,
-            cpumode: level1,
-        })
-    }
+    Box::new(Isa {
+        triple,
+        isa_flags,
+        shared_flags,
+        cpumode: level1,
+    })
 }

 impl TargetIsa for Isa {
--- a/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
@ -13,7 +13,6 @@ use log::warn;
 pub(crate) fn create_unwind_info(
    func: &Function,
    isa: &dyn TargetIsa,
-    frame_register: Option<RegUnit>,
 ) -> CodegenResult<Option<UnwindInfo>> {
    // Only Windows fastcall is supported for unwind information
    if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
@ -28,7 +27,6 @@ pub(crate) fn create_unwind_info(
    let mut prologue_size = 0;
    let mut unwind_codes = Vec::new();
    let mut found_end = false;
-    let mut xmm_save_count: u8 = 0;

    for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
        // x64 ABI prologues cannot exceed 255 bytes in length
@ -64,16 +62,6 @@ pub(crate) fn create_unwind_info(
                    _ => {}
                }
            }
-            InstructionData::CopySpecial { src, dst, .. } => {
-                if let Some(frame_register) = frame_register {
-                    if src == (RU::rsp as RegUnit) && dst == frame_register {
-                        unwind_codes.push(UnwindCode::SetFramePointer {
-                            offset: unwind_offset,
-                            sp_offset: 0,
-                        });
-                    }
-                }
-            }
            InstructionData::UnaryImm { opcode, imm } => {
                match opcode {
                    Opcode::Iconst => {
@ -112,7 +100,6 @@ pub(crate) fn create_unwind_info(
                {
                    // If this is a save of an FPR, record an unwind operation
                    // Note: the stack_offset here is relative to an adjusted SP
-                    // This will be fixed up later to be based on the frame pointer offset
                    if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
                        let offset: i32 = offset.into();
                        unwind_codes.push(UnwindCode::SaveXmm {
@ -120,8 +107,6 @@ pub(crate) fn create_unwind_info(
                            reg: src as u8,
                            stack_offset: offset as u32,
                        });
-
-                        xmm_save_count += 1;
                    }
                }
            }
@ -136,45 +121,11 @@ pub(crate) fn create_unwind_info(

    assert!(found_end);

-    // When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
-    // register minus some offset, forming a "base address". This attempts to calculate the frame register offset
-    // while updating the XMM save offsets to be relative from this "base address" rather than RSP.
-    let mut frame_register_offset = 0;
-    if frame_register.is_some() && xmm_save_count > 0 {
-        // Determine the number of 16-byte slots used for all CSRs (including GPRs)
-        // The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
-        // Assumption: each FPR is stored at a lower address than the previous one
-        let mut last_stack_offset = None;
-        let mut fpr_save_count: u8 = 0;
-        let mut gpr_push_count: u8 = 0;
-        for code in unwind_codes.iter_mut() {
-            match code {
-                UnwindCode::SaveXmm { stack_offset, .. } => {
-                    if let Some(last) = last_stack_offset {
-                        assert!(last > *stack_offset);
-                    }
-                    last_stack_offset = Some(*stack_offset);
-                    fpr_save_count += 1;
-                    *stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
-                }
-                UnwindCode::PushRegister { .. } => {
-                    gpr_push_count += 1;
-                }
-                _ => {}
-            }
-        }
-        assert_eq!(fpr_save_count, xmm_save_count);
-
-        // Account for alignment space when there's an odd number of GPR pushes
-        // Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
-        frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
-    }
-
    Ok(Some(UnwindInfo {
        flags: 0, // this assumes cranelift functions have no SEH handlers
        prologue_size: prologue_size as u8,
-        frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
-        frame_register_offset,
+        frame_register: None,
+        frame_register_offset: 0,
        unwind_codes,
    }))
 }
@ -201,7 +152,7 @@ mod tests {
        context.compile(&*isa).expect("expected compilation");

        assert_eq!(
-            create_unwind_info(&context.func, &*isa, None).expect("can create unwind info"),
+            create_unwind_info(&context.func, &*isa).expect("can create unwind info"),
            None
        );
    }
@ -219,7 +170,7 @@ mod tests {

        context.compile(&*isa).expect("expected compilation");

-        let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
+        let unwind = create_unwind_info(&context.func, &*isa)
            .expect("can create unwind info")
            .expect("expected unwind info");

@ -228,17 +179,13 @@ mod tests {
            UnwindInfo {
                flags: 0,
                prologue_size: 9,
-                frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
+                frame_register: None,
                frame_register_offset: 0,
                unwind_codes: vec![
                    UnwindCode::PushRegister {
                        offset: 2,
                        reg: GPR.index_of(RU::rbp.into()) as u8
                    },
-                    UnwindCode::SetFramePointer {
-                        offset: 5,
-                        sp_offset: 0
-                    },
                    UnwindCode::StackAlloc {
                        offset: 9,
                        size: 64
@ -247,9 +194,9 @@ mod tests {
            }
        );

-        assert_eq!(unwind.emit_size(), 12);
+        assert_eq!(unwind.emit_size(), 8);

-        let mut buf = [0u8; 12];
+        let mut buf = [0u8; 8];
        unwind.emit(&mut buf);

        assert_eq!(
@ -257,16 +204,12 @@ mod tests {
            [
                0x01, // Version and flags (version 1, no flags)
                0x09, // Prologue size
-                0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
-                0x05, // Frame register + offset (RBP with 0 offset)
+                0x02, // Unwind code count (1 for stack alloc, 1 for push reg)
+                0x00, // Frame register + offset (no frame register)
                0x09, // Prolog offset
                0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
-                0x05, // Prolog offset
-                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
                0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
-                0x00, // Padding byte
-                0x00, // Padding byte
            ]
        );
    }
@ -284,7 +227,7 @@ mod tests {

        context.compile(&*isa).expect("expected compilation");

-        let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
+        let unwind = create_unwind_info(&context.func, &*isa)
            .expect("can create unwind info")
            .expect("expected unwind info");

@ -293,17 +236,13 @@ mod tests {
            UnwindInfo {
                flags: 0,
                prologue_size: 27,
-                frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
+                frame_register: None,
                frame_register_offset: 0,
                unwind_codes: vec![
                    UnwindCode::PushRegister {
                        offset: 2,
                        reg: GPR.index_of(RU::rbp.into()) as u8
                    },
-                    UnwindCode::SetFramePointer {
-                        offset: 5,
-                        sp_offset: 0
-                    },
                    UnwindCode::StackAlloc {
                        offset: 27,
                        size: 10000
@ -322,16 +261,16 @@ mod tests {
            [
                0x01, // Version and flags (version 1, no flags)
                0x1B, // Prologue size
-                0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg)
-                0x05, // Frame register + offset (RBP with 0 offset)
+                0x03, // Unwind code count (2 for stack alloc, 1 for push reg)
+                0x00, // Frame register + offset (no frame register)
                0x1B, // Prolog offset
                0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
                0xE2, // Low size byte
                0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
-                0x05, // Prolog offset
-                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
                0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
+                0x00, // Padding
+                0x00, // Padding
            ]
        );
    }
@ -349,7 +288,7 @@ mod tests {

        context.compile(&*isa).expect("expected compilation");

-        let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
+        let unwind = create_unwind_info(&context.func, &*isa)
            .expect("can create unwind info")
            .expect("expected unwind info");

@ -358,17 +297,13 @@ mod tests {
            UnwindInfo {
                flags: 0,
                prologue_size: 27,
-                frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
+                frame_register: None,
                frame_register_offset: 0,
                unwind_codes: vec![
                    UnwindCode::PushRegister {
                        offset: 2,
                        reg: GPR.index_of(RU::rbp.into()) as u8
                    },
-                    UnwindCode::SetFramePointer {
-                        offset: 5,
-                        sp_offset: 0
-                    },
                    UnwindCode::StackAlloc {
                        offset: 27,
                        size: 1000000
@ -377,9 +312,9 @@ mod tests {
            }
        );

-        assert_eq!(unwind.emit_size(), 16);
+        assert_eq!(unwind.emit_size(), 12);

-        let mut buf = [0u8; 16];
+        let mut buf = [0u8; 12];
        unwind.emit(&mut buf);

        assert_eq!(
@ -387,20 +322,16 @@ mod tests {
            [
                0x01, // Version and flags (version 1, no flags)
                0x1B, // Prologue size
-                0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg)
-                0x05, // Frame register + offset (RBP with 0 offset)
+                0x04, // Unwind code count (3 for stack alloc, 1 for push reg)
+                0x00, // Frame register + offset (no frame register)
                0x1B, // Prolog offset
                0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
                0x40, // Byte 1 of size
                0x42, // Byte 2 of size
                0x0F, // Byte 3 of size
                0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
-                0x05, // Prolog offset
-                0x03, // Operation 3 (save frame register), stack pointer offset = 0
                0x02, // Prolog offset
                0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
-                0x00, // Padding byte
-                0x00, // Padding byte
            ]
        );
    }
--- a/third_party/rust/cranelift-codegen/src/legalizer/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/legalizer/mod.rs
@ -19,10 +19,24 @@ use crate::flowgraph::ControlFlowGraph;
 use crate::ir::types::{I32, I64};
 use crate::ir::{self, InstBuilder, MemFlags};
 use crate::isa::TargetIsa;
+
+#[cfg(any(
+    feature = "x86",
+    feature = "arm32",
+    feature = "arm64",
+    feature = "riscv"
+))]
 use crate::predicates;
+#[cfg(any(
+    feature = "x86",
+    feature = "arm32",
+    feature = "arm64",
+    feature = "riscv"
+))]
+use alloc::vec::Vec;
+
 use crate::timing;
 use alloc::collections::BTreeSet;
-use alloc::vec::Vec;

 mod boundary;
 mod call;
--- a/third_party/rust/cranelift-codegen/src/machinst/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi.rs
@ -1,5 +1,6 @@
 //! ABI definitions.

+use crate::binemit::Stackmap;
 use crate::ir::{ArgumentExtension, StackSlot};
 use crate::machinst::*;
 use crate::settings;
@ -100,6 +101,15 @@ pub trait ABIBody {
    /// Store to a spillslot.
    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;

+    /// Generate a stackmap, given a list of spillslots and the emission state
+    /// at a given program point (prior to emission fo the safepointing
+    /// instruction).
+    fn spillslots_to_stackmap(
+        &self,
+        slots: &[SpillSlot],
+        state: &<Self::I as MachInstEmit>::State,
+    ) -> Stackmap;
+
    /// Generate a prologue, post-regalloc. This should include any stack
    /// frame or other setup necessary to use the other methods (`load_arg`,
    /// `store_retval`, and spillslot accesses.)  `self` is mutable so that we
@ -113,21 +123,34 @@ pub trait ABIBody {
    /// likely closely related.
    fn gen_epilogue(&self) -> Vec<Self::I>;

-    /// Returns the full frame size for the given function, after prologue emission has run. This
-    /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
-    /// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
-    /// This is used for ABI variants where the client generates prologue/epilogue code, as in
-    /// Baldrdash (SpiderMonkey integration).
+    /// Returns the full frame size for the given function, after prologue
+    /// emission has run. This comprises the spill slots and stack-storage slots
+    /// (but not storage for clobbered callee-save registers, arguments pushed
+    /// at callsites within this function, or other ephemeral pushes).  This is
+    /// used for ABI variants where the client generates prologue/epilogue code,
+    /// as in Baldrdash (SpiderMonkey integration).
    fn frame_size(&self) -> u32;

+    /// Returns the size of arguments expected on the stack.
+    fn stack_args_size(&self) -> u32;
+
    /// Get the spill-slot size.
    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;

-    /// Generate a spill.
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;
+    /// Generate a spill. The type, if known, is given; this can be used to
+    /// generate a store instruction optimized for the particular type rather
+    /// than the RegClass (e.g., only F64 that resides in a V128 register). If
+    /// no type is given, the implementation should spill the whole register.
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;

-    /// Generate a reload (fill).
-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
+    /// Generate a reload (fill). As for spills, the type may be given to allow
+    /// a more optimized load instruction to be generated.
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        ty: Option<Type>,
+    ) -> Self::I;
 }

 /// Trait implemented by an object that tracks ABI-related state and can
--- a/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
@ -140,7 +140,7 @@
 //! Given these invariants, we argue why each optimization preserves execution
 //! semantics below (grep for "Preserves execution semantics").

-use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, Stackmap};
 use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
 use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};

@ -168,6 +168,8 @@ pub struct MachBuffer<I: VCodeInst> {
    call_sites: SmallVec<[MachCallSite; 16]>,
    /// Any source location mappings referring to this code.
    srclocs: SmallVec<[MachSrcLoc; 64]>,
+    /// Any stackmaps referring to this code.
+    stackmaps: SmallVec<[MachStackMap; 8]>,
    /// The current source location in progress (after `start_srcloc()` and
    /// before `end_srcloc()`).  This is a (start_offset, src_loc) tuple.
    cur_srcloc: Option<(CodeOffset, SourceLoc)>,
@ -228,6 +230,8 @@ pub struct MachBufferFinalized {
    call_sites: SmallVec<[MachCallSite; 16]>,
    /// Any source location mappings referring to this code.
    srclocs: SmallVec<[MachSrcLoc; 64]>,
+    /// Any stackmaps referring to this code.
+    stackmaps: SmallVec<[MachStackMap; 8]>,
 }

 static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff;
@ -262,6 +266,7 @@ impl<I: VCodeInst> MachBuffer<I> {
            traps: SmallVec::new(),
            call_sites: SmallVec::new(),
            srclocs: SmallVec::new(),
+            stackmaps: SmallVec::new(),
            cur_srcloc: None,
            label_offsets: SmallVec::new(),
            label_aliases: SmallVec::new(),
@ -1090,6 +1095,7 @@ impl<I: VCodeInst> MachBuffer<I> {
            traps: self.traps,
            call_sites: self.call_sites,
            srclocs: self.srclocs,
+            stackmaps: self.stackmaps,
        }
    }

@ -1149,6 +1155,22 @@ impl<I: VCodeInst> MachBuffer<I> {
            self.srclocs.push(MachSrcLoc { start, end, loc });
        }
    }
+
+    /// Add stackmap metadata for this program point: a set of stack offsets
+    /// (from SP upward) that contain live references.
+    ///
+    /// The `offset_to_fp` value is the offset from the nominal SP (at which the
+    /// `stack_offsets` are based) and the FP value. By subtracting
+    /// `offset_to_fp` from each `stack_offsets` element, one can obtain
+    /// live-reference offsets from FP instead.
+    pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) {
+        let offset = self.cur_offset();
+        self.stackmaps.push(MachStackMap {
+            offset,
+            offset_end: offset + insn_len,
+            stackmap,
+        });
+    }
 }

 impl MachBufferFinalized {
@ -1207,6 +1229,11 @@ impl MachBufferFinalized {
        sink.begin_rodata();
        sink.end_codegen();
    }
+
+    /// Get the stackmap metadata for this code.
+    pub fn stackmaps(&self) -> &[MachStackMap] {
+        &self.stackmaps[..]
+    }
 }

 /// A constant that is deferred to the next constant-pool opportunity.
@ -1286,6 +1313,19 @@ pub struct MachSrcLoc {
    pub loc: SourceLoc,
 }

+/// Record of stackmap metadata: stack offsets containing references.
+#[derive(Clone, Debug)]
+pub struct MachStackMap {
+    /// The code offset at which this stackmap applies.
+    pub offset: CodeOffset,
+    /// The code offset just past the "end" of the instruction: that is, the
+    /// offset of the first byte of the following instruction, or equivalently,
+    /// the start offset plus the instruction length.
+    pub offset_end: CodeOffset,
+    /// The Stackmap itself.
+    pub stackmap: Stackmap,
+}
+
 /// Record of branch instruction in the buffer, to facilitate editing.
 #[derive(Clone, Debug)]
 struct MachBranch {
@ -1390,7 +1430,9 @@ mod test {
        inst.emit(&mut buf, &flags, &mut state);

        buf.bind_label(label(1));
-        let inst = Inst::Nop4;
+        let inst = Inst::Udf {
+            trap_info: (SourceLoc::default(), TrapCode::Interrupt),
+        };
        inst.emit(&mut buf, &flags, &mut state);

        buf.bind_label(label(2));
@ -1403,14 +1445,13 @@ mod test {

        let mut buf2 = MachBuffer::new();
        let mut state = Default::default();
-        let inst = Inst::OneWayCondBr {
-            kind: CondBrKind::Zero(xreg(0)),
-            target: BranchTarget::ResolvedOffset(8),
+        let inst = Inst::TrapIf {
+            kind: CondBrKind::NotZero(xreg(0)),
+            trap_info: (SourceLoc::default(), TrapCode::Interrupt),
        };
        inst.emit(&mut buf2, &flags, &mut state);
        let inst = Inst::Nop4;
        inst.emit(&mut buf2, &flags, &mut state);
-        inst.emit(&mut buf2, &flags, &mut state);

        let buf2 = buf2.finish();

--- a/third_party/rust/cranelift-codegen/src/machinst/compile.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/compile.rs
@ -23,7 +23,7 @@ where
    // Build the lowering context.
    let lower = Lower::new(f, abi, block_order)?;
    // Lower the IR.
-    let mut vcode = lower.lower(b)?;
+    let (mut vcode, stackmap_request_info) = lower.lower(b)?;

    debug!(
        "vcode from lowering: \n{}",
@ -57,11 +57,23 @@ where
        }
    }

+    // If either there are no reference-typed values, or else there are
+    // but there are no safepoints at which we need to know about them,
+    // then we don't need stackmaps.
+    let sri = if stackmap_request_info.reftyped_vregs.len() > 0
+        && stackmap_request_info.safepoint_insns.len() > 0
+    {
+        Some(&stackmap_request_info)
+    } else {
+        None
+    };
+
    let result = {
        let _tt = timing::regalloc();
        allocate_registers_with_opts(
            &mut vcode,
            b.reg_universe(),
+            sri,
            Options {
                run_checker,
                algorithm,
--- a/third_party/rust/cranelift-codegen/src/machinst/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
@ -17,7 +17,7 @@ use crate::machinst::{
 };
 use crate::CodegenResult;

-use regalloc::{Reg, RegClass, VirtualReg, Writable};
+use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable};

 use alloc::boxed::Box;
 use alloc::vec::Vec;
@ -146,6 +146,8 @@ pub trait LowerCtx {
    fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
    /// Emit a machine instruction.
    fn emit(&mut self, mach_inst: Self::I);
+    /// Emit a machine instruction that is a safepoint.
+    fn emit_safepoint(&mut self, mach_inst: Self::I);
    /// Indicate that the given input uses the register returned by
    /// `get_input()`. Codegen may not happen otherwise for the producing
    /// instruction if it has no side effects and no uses.
@ -206,6 +208,14 @@ pub trait LowerBackend {
    }
 }

+/// A pending instruction to insert and auxiliary information about it: its source location and
+/// whether it is a safepoint.
+struct InstTuple<I: VCodeInst> {
+    loc: SourceLoc,
+    is_safepoint: bool,
+    inst: I,
+}
+
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'func, I: VCodeInst> {
@ -237,17 +247,17 @@ pub struct Lower<'func, I: VCodeInst> {
    next_vreg: u32,

    /// Insts in reverse block order, before final copy to vcode.
-    block_insts: Vec<(SourceLoc, I)>,
+    block_insts: Vec<InstTuple<I>>,

    /// Ranges in `block_insts` constituting BBs.
    block_ranges: Vec<(usize, usize)>,

    /// Instructions collected for the BB in progress, in reverse order, with
    /// source-locs attached.
-    bb_insts: Vec<(SourceLoc, I)>,
+    bb_insts: Vec<InstTuple<I>>,

    /// Instructions collected for the CLIF inst in progress, in forward order.
-    ir_insts: Vec<I>,
+    ir_insts: Vec<InstTuple<I>>,

    /// The register to use for GetPinnedReg, if any, on this architecture.
    pinned_reg: Option<Reg>,
@ -276,6 +286,7 @@ fn alloc_vreg(
        let v = *next_vreg;
        *next_vreg += 1;
        value_regs[value] = Reg::new_virtual(regclass, v);
+        debug!("value {} gets vreg {:?}", value, v);
    }
    value_regs[value].as_virtual_reg().unwrap()
 }
@ -579,15 +590,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    fn finish_ir_inst(&mut self, loc: SourceLoc) {
-        for inst in self.ir_insts.drain(..).rev() {
-            self.bb_insts.push((loc, inst));
+        // `bb_insts` is kept in reverse order, so emit the instructions in
+        // reverse order.
+        for mut tuple in self.ir_insts.drain(..).rev() {
+            tuple.loc = loc;
+            self.bb_insts.push(tuple);
        }
    }

    fn finish_bb(&mut self) {
        let start = self.block_insts.len();
-        for pair in self.bb_insts.drain(..).rev() {
-            self.block_insts.push(pair);
+        for tuple in self.bb_insts.drain(..).rev() {
+            self.block_insts.push(tuple);
        }
        let end = self.block_insts.len();
        self.block_ranges.push((start, end));
@ -595,9 +609,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

    fn copy_bbs_to_vcode(&mut self) {
        for &(start, end) in self.block_ranges.iter().rev() {
-            for &(loc, ref inst) in &self.block_insts[start..end] {
+            for &InstTuple {
+                loc,
+                is_safepoint,
+                ref inst,
+            } in &self.block_insts[start..end]
+            {
                self.vcode.set_srcloc(loc);
-                self.vcode.push(inst.clone());
+                self.vcode.push(inst.clone(), is_safepoint);
            }
            self.vcode.end_bb();
        }
@ -645,7 +664,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    /// Lower the function.
-    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
+    pub fn lower<B: LowerBackend<MInst = I>>(
+        mut self,
+        backend: &B,
+    ) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
        debug!("about to lower function: {:?}", self.f);

        // Initialize the ABI object, giving it a temp if requested.
@ -730,10 +752,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        self.copy_bbs_to_vcode();

        // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
-        let vcode = self.vcode.build();
+        let (vcode, stackmap_info) = self.vcode.build();
        debug!("built vcode: {:?}", vcode);

-        Ok(vcode)
+        Ok((vcode, stackmap_info))
    }

    /// Get the actual inputs for a value. This is the implementation for
@ -916,7 +938,19 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    }

    fn emit(&mut self, mach_inst: I) {
-        self.ir_insts.push(mach_inst);
+        self.ir_insts.push(InstTuple {
+            loc: SourceLoc::default(),
+            is_safepoint: false,
+            inst: mach_inst,
+        });
+    }
+
+    fn emit_safepoint(&mut self, mach_inst: I) {
+        self.ir_insts.push(InstTuple {
+            loc: SourceLoc::default(),
+            is_safepoint: true,
+            inst: mach_inst,
+        });
    }

    fn use_input_reg(&mut self, input: LowerInput) {
--- a/third_party/rust/cranelift-codegen/src/machinst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/mod.rs
@ -96,7 +96,7 @@
 //!
 //! ```

-use crate::binemit::{CodeInfo, CodeOffset};
+use crate::binemit::{CodeInfo, CodeOffset, Stackmap};
 use crate::ir::condcodes::IntCC;
 use crate::ir::{Function, Type};
 use crate::result::CodegenResult;
@ -191,6 +191,10 @@ pub trait MachInst: Clone + Debug {
    /// What is the worst-case instruction size emitted by this instruction type?
    fn worst_case_size() -> CodeOffset;

+    /// What is the register class used for reference types (GC-observable pointers)? Can
+    /// be dependent on compilation flags.
+    fn ref_type_regclass(_flags: &Flags) -> RegClass;
+
    /// A label-use kind: a type that describes the types of label references that
    /// can occur in an instruction.
    type LabelUse: MachInstLabelUse;
@ -256,9 +260,21 @@ pub enum MachTerminator<'a> {
 /// A trait describing the ability to encode a MachInst into binary machine code.
 pub trait MachInstEmit: MachInst {
    /// Persistent state carried across `emit` invocations.
-    type State: Default + Clone + Debug;
+    type State: MachInstEmitState<Self>;
    /// Emit the instruction.
    fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
+    /// Pretty-print the instruction.
+    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
+}
+
+/// A trait describing the emission state carried between MachInsts when
+/// emitting a function body.
+pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
+    /// Create a new emission state given the ABI object.
+    fn new(abi: &dyn ABIBody<I = I>) -> Self;
+    /// Update the emission state before emitting an instruction that is a
+    /// safepoint.
+    fn pre_safepoint(&mut self, _stackmap: Stackmap) {}
 }

 /// The result of a `MachBackend::compile_function()` call. Contains machine
--- a/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
@ -17,14 +17,15 @@
 //! See the main module comment in `mod.rs` for more details on the VCode-based
 //! backend pipeline.

-use crate::ir::{self, SourceLoc};
+use crate::ir::{self, types, SourceLoc};
 use crate::machinst::*;
 use crate::settings;

 use regalloc::Function as RegallocFunction;
 use regalloc::Set as RegallocSet;
 use regalloc::{
-    BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper,
+    BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
+    StackmapRequestInfo,
 };

 use alloc::boxed::Box;
@ -56,6 +57,9 @@ pub struct VCode<I: VCodeInst> {
    /// VReg IR-level types.
    vreg_types: Vec<Type>,

+    /// Do we have any ref values among our vregs?
+    have_ref_values: bool,
+
    /// Lowered machine instructions in order corresponding to the original IR.
    insts: Vec<I>,

@ -82,6 +86,16 @@ pub struct VCode<I: VCodeInst> {

    /// ABI object.
    abi: Box<dyn ABIBody<I = I>>,
+
+    /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
+    /// regalloc, the safepoint instructions are listed in the separate
+    /// `StackmapRequestInfo` held separate from the `VCode`.)
+    safepoint_insns: Vec<InsnIndex>,
+
+    /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s.
+    /// These are used to generate actual stackmaps at emission. Filled in
+    /// post-regalloc.
+    safepoint_slots: Vec<Vec<SpillSlot>>,
 }

 /// A builder for a VCode function body. This builder is designed for the
@ -102,6 +116,9 @@ pub struct VCodeBuilder<I: VCodeInst> {
    /// In-progress VCode.
    vcode: VCode<I>,

+    /// In-progress stackmap-request info.
+    stackmap_info: StackmapRequestInfo,
+
    /// Index of the last block-start in the vcode.
    block_start: InsnIndex,

@ -115,9 +132,17 @@ pub struct VCodeBuilder<I: VCodeInst> {
 impl<I: VCodeInst> VCodeBuilder<I> {
    /// Create a new VCodeBuilder.
    pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
+        let reftype_class = I::ref_type_regclass(abi.flags());
        let vcode = VCode::new(abi, block_order);
+        let stackmap_info = StackmapRequestInfo {
+            reftype_class,
+            reftyped_vregs: vec![],
+            safepoint_insns: vec![],
+        };
+
        VCodeBuilder {
            vcode,
+            stackmap_info,
            block_start: 0,
            succ_start: 0,
            cur_srcloc: SourceLoc::default(),
@ -142,6 +167,15 @@ impl<I: VCodeInst> VCodeBuilder<I> {
                .resize(vreg.get_index() + 1, ir::types::I8);
        }
        self.vcode.vreg_types[vreg.get_index()] = ty;
+        if is_reftype(ty) {
+            self.stackmap_info.reftyped_vregs.push(vreg);
+            self.vcode.have_ref_values = true;
+        }
+    }
+
+    /// Are there any reference-typed values at all among the vregs?
+    pub fn have_ref_values(&self) -> bool {
+        self.vcode.have_ref_values()
    }

    /// Set the current block as the entry block.
@ -166,7 +200,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
    }

    /// Push an instruction for the current BB and current IR inst within the BB.
-    pub fn push(&mut self, insn: I) {
+    pub fn push(&mut self, insn: I, is_safepoint: bool) {
        match insn.is_term() {
            MachTerminator::None | MachTerminator::Ret => {}
            MachTerminator::Uncond(target) => {
@ -186,6 +220,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        }
        self.vcode.insts.push(insn);
        self.vcode.srclocs.push(self.cur_srcloc);
+        if is_safepoint {
+            self.stackmap_info
+                .safepoint_insns
+                .push(InstIx::new((self.vcode.insts.len() - 1) as u32));
+        }
    }

    /// Get the current source location.
@ -198,21 +237,16 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        self.cur_srcloc = srcloc;
    }

-    /// Build the final VCode.
-    pub fn build(self) -> VCode<I> {
-        self.vcode
+    /// Build the final VCode, returning the vcode itself as well as auxiliary
+    /// information, such as the stackmap request information.
+    pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
+        // TODO: come up with an abstraction for "vcode and auxiliary data". The
+        // auxiliary data needs to be separate from the vcode so that it can be
+        // referenced as the vcode is mutated (e.g. by the register allocator).
+        (self.vcode, self.stackmap_info)
    }
 }

-fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
-    let v = indices
-        .iter()
-        .map(|iix| iix.get() as usize)
-        .chain(iter::once(len))
-        .collect::<Vec<usize>>();
-    v.windows(2).map(|p| (p[0], p[1])).collect()
-}
-
 fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
    if let Some((to, from)) = insn.is_move() {
        to.to_reg() == from
@ -221,6 +255,11 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
    }
 }

+/// Is this type a reference type?
+fn is_reftype(ty: Type) -> bool {
+    ty == types::R64 || ty == types::R32
+}
+
 impl<I: VCodeInst> VCode<I> {
    /// New empty VCode.
    fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
@ -228,6 +267,7 @@ impl<I: VCodeInst> VCode<I> {
            liveins: abi.liveins(),
            liveouts: abi.liveouts(),
            vreg_types: vec![],
+            have_ref_values: false,
            insts: vec![],
            srclocs: vec![],
            entry: 0,
@ -236,6 +276,8 @@ impl<I: VCodeInst> VCode<I> {
            block_succs: vec![],
            block_order,
            abi,
+            safepoint_insns: vec![],
+            safepoint_slots: vec![],
        }
    }

@ -249,6 +291,11 @@ impl<I: VCodeInst> VCode<I> {
        self.vreg_types[vreg.get_index()]
    }

+    /// Are there any reference-typed values at all among the vregs?
+    pub fn have_ref_values(&self) -> bool {
+        self.have_ref_values
+    }
+
    /// Get the entry block.
    pub fn entry(&self) -> BlockIndex {
        self.entry
@ -265,6 +312,11 @@ impl<I: VCodeInst> VCode<I> {
        self.abi.frame_size()
    }

+    /// Inbound stack-args size.
+    pub fn stack_args_size(&self) -> u32 {
+        self.abi.stack_args_size()
+    }
+
    /// Get the successors for a block.
    pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
        let (start, end) = self.block_succ_range[block as usize];
@ -281,17 +333,21 @@ impl<I: VCodeInst> VCode<I> {
        self.abi
            .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));

-        // We want to move instructions over in final block order, using the new
-        // block-start map given by the regalloc.
-        let block_ranges: Vec<(usize, usize)> =
-            block_ranges(result.target_map.elems(), result.insns.len());
        let mut final_insns = vec![];
        let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
        let mut final_srclocs = vec![];
+        let mut final_safepoint_insns = vec![];
+        let mut safept_idx = 0;

+        assert!(result.target_map.elems().len() == self.num_blocks());
        for block in 0..self.num_blocks() {
+            let start = result.target_map.elems()[block].get() as usize;
+            let end = if block == self.num_blocks() - 1 {
+                result.insns.len()
+            } else {
+                result.target_map.elems()[block + 1].get() as usize
+            };
            let block = block as BlockIndex;
-            let (start, end) = block_ranges[block as usize];
            let final_start = final_insns.len() as InsnIndex;

            if block == self.entry {
@ -333,6 +389,16 @@ impl<I: VCodeInst> VCode<I> {
                    final_insns.push(insn.clone());
                    final_srclocs.push(srcloc);
                }
+
+                // Was this instruction a safepoint instruction? Add its final
+                // index to the safepoint insn-index list if so.
+                if safept_idx < result.new_safepoint_insns.len()
+                    && (result.new_safepoint_insns[safept_idx].get() as usize) == i
+                {
+                    let idx = final_insns.len() - 1;
+                    final_safepoint_insns.push(idx as InsnIndex);
+                    safept_idx += 1;
+                }
            }

            let final_end = final_insns.len() as InsnIndex;
@ -344,6 +410,12 @@ impl<I: VCodeInst> VCode<I> {
        self.insts = final_insns;
        self.srclocs = final_srclocs;
        self.block_ranges = final_block_ranges;
+        self.safepoint_insns = final_safepoint_insns;
+
+        // Save safepoint slot-lists. These will be passed to the `EmitState`
+        // for the machine backend during emission so that it can do
+        // target-specific translations of slot numbers to stack offsets.
+        self.safepoint_slots = result.stackmaps;
    }

    /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
@ -353,11 +425,12 @@ impl<I: VCodeInst> VCode<I> {
        I: MachInstEmit,
    {
        let mut buffer = MachBuffer::new();
-        let mut state = Default::default();
+        let mut state = I::State::new(&*self.abi);

        buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.

        let flags = self.abi.flags();
+        let mut safepoint_idx = 0;
        let mut cur_srcloc = None;
        for block in 0..self.num_blocks() {
            let block = block as BlockIndex;
@ -381,6 +454,19 @@ impl<I: VCodeInst> VCode<I> {
                    cur_srcloc = Some(srcloc);
                }

+                if safepoint_idx < self.safepoint_insns.len()
+                    && self.safepoint_insns[safepoint_idx] == iix
+                {
+                    if self.safepoint_slots[safepoint_idx].len() > 0 {
+                        let stackmap = self.abi.spillslots_to_stackmap(
+                            &self.safepoint_slots[safepoint_idx][..],
+                            &state,
+                        );
+                        state.pre_safepoint(stackmap);
+                    }
+                    safepoint_idx += 1;
+                }
+
                self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
            }

@ -476,13 +562,18 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
        self.abi.get_spillslot_size(regclass, ty)
    }

-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
-        let ty = self.vreg_type(vreg);
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
+        let ty = vreg.map(|v| self.vreg_type(v));
        self.abi.gen_spill(to_slot, from_reg, ty)
    }

-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
-        let ty = self.vreg_type(vreg);
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        vreg: Option<VirtualReg>,
+    ) -> I {
+        let ty = vreg.map(|v| self.vreg_type(v));
        self.abi.gen_reload(to_reg, from_slot, ty)
    }

@ -531,7 +622,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
 }

 /// Pretty-printing with `RealRegUniverse` context.
-impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
+impl<I: VCodeInst> ShowWithRRU for VCode<I> {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        use std::fmt::Write;

@ -539,6 +630,8 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();

+        let mut state = Default::default();
+        let mut safepoint_idx = 0;
        for i in 0..self.num_blocks() {
            let block = i as BlockIndex;

@ -552,11 +645,22 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
            let (start, end) = self.block_ranges[block as usize];
            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
            for inst in start..end {
+                if safepoint_idx < self.safepoint_insns.len()
+                    && self.safepoint_insns[safepoint_idx] == inst
+                {
+                    write!(
+                        &mut s,
+                        "      (safepoint: slots {:?} with EmitState {:?})\n",
+                        self.safepoint_slots[safepoint_idx], state,
+                    )
+                    .unwrap();
+                    safepoint_idx += 1;
+                }
                write!(
                    &mut s,
                    "  Inst {}:   {}\n",
                    inst,
-                    self.insts[inst as usize].show_rru(mb_rru)
+                    self.insts[inst as usize].pretty_print(mb_rru, &mut state)
                )
                .unwrap();
            }
--- a/third_party/rust/cranelift-codegen/src/preopt.peepmatic
+++ b/third_party/rust/cranelift-codegen/src/preopt.peepmatic
@ -191,3 +191,9 @@
 (=> (when (udiv_imm $C $x)
          (is-power-of-two $C))
    (ushr_imm $(log2 $C) $x))
+
+;; Remainder by a power of two -> bitwise and with decreased by one constant.
+(=> (when (urem_imm $C $x)
+          (is-power-of-two $C)
+          (fits-in-native-word $C))
+    (band_imm $(isub $C 1) $x))
--- a/third_party/rust/cranelift-codegen/src/preopt.serialized
+++ b/third_party/rust/cranelift-codegen/src/preopt.serialized
--- a/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs
+++ b/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs
@ -1,6 +1,7 @@
 use crate::cursor::{Cursor, FuncCursor};
 use crate::dominator_tree::DominatorTree;
-use crate::ir::{Function, InstBuilder, Opcode};
+use crate::inst_predicates::is_safepoint;
+use crate::ir::{Function, InstBuilder};
 use crate::isa::TargetIsa;
 use crate::regalloc::live_value_tracker::LiveValueTracker;
 use crate::regalloc::liveness::Liveness;
@ -51,12 +52,8 @@ pub fn emit_stackmaps(
        pos.goto_top(block);

        while let Some(inst) = pos.next_inst() {
-            if pos.func.dfg[inst].opcode().is_resumable_trap() {
+            if is_safepoint(&pos.func, inst) {
                insert_and_encode_safepoint(&mut pos, tracker, isa);
-            } else if pos.func.dfg[inst].opcode().is_call() {
-                insert_and_encode_safepoint(&mut pos, tracker, isa);
-            } else if pos.func.dfg[inst].opcode() == Opcode::Safepoint {
-                panic!("safepoint instruction can only be used by the compiler!");
            }

            // Process the instruction and get rid of dead values.
--- a/third_party/rust/cranelift-entity/.cargo-checksum.json
+++ b/third_party/rust/cranelift-entity/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"49bb9e126a98fa9d3d61a69ffaf24d66bab5b65c87f607fdc809a0c68ed607cb","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
+{"files":{"Cargo.toml":"0ac209bc13b1152b67c8ab3e0a87ab512d966367758cc7fa131096dbe97a1da8","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
--- a/third_party/rust/cranelift-entity/Cargo.toml
+++ b/third_party/rust/cranelift-entity/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-entity"
-version = "0.65.0"
+version = "0.66.0"
 description = "Data structures using entity references as mapping keys"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-entity"
--- a/third_party/rust/cranelift-frontend/.cargo-checksum.json
+++ b/third_party/rust/cranelift-frontend/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"2633e2c61491f80fbeea54dcf8763ff7c4b91510da00c32fdba8425cf5267a74","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
+{"files":{"Cargo.toml":"52587586762dcb18c8ae39de76ef388a78b857d8fecd87b77b6a30dc8f85e1f5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"114e1ff1e5eacaf3c79946fcf441a8f525148a50e94a3f81373d4b745ac09a9f","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
--- a/third_party/rust/cranelift-frontend/Cargo.toml
+++ b/third_party/rust/cranelift-frontend/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-frontend"
-version = "0.65.0"
+version = "0.66.0"
 description = "Cranelift IR builder helper"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://docs.rs/cranelift-frontend"
@ -11,7 +11,7 @@ readme = "README.md"
 edition = "2018"

 [dependencies]
-cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false }
+cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false }
 target-lexicon = "0.10"
 log = { version = "0.4.6", default-features = false }
 hashbrown = { version = "0.7", optional = true }
--- a/third_party/rust/cranelift-frontend/src/switch.rs
+++ b/third_party/rust/cranelift-frontend/src/switch.rs
@ -1,11 +1,12 @@
 use super::HashMap;
 use crate::frontend::FunctionBuilder;
 use alloc::vec::Vec;
+use core::convert::TryFrom;
 use cranelift_codegen::ir::condcodes::IntCC;
 use cranelift_codegen::ir::*;
 use log::debug;

-type EntryIndex = u64;
+type EntryIndex = u128;

 /// Unlike with `br_table`, `Switch` cases may be sparse or non-0-based.
 /// They emit efficient code using branches, jump tables, or a combination of both.
@ -152,11 +153,9 @@ impl Switch {
                let left_block = bx.create_block();
                let right_block = bx.create_block();

-                let should_take_right_side = bx.ins().icmp_imm(
-                    IntCC::UnsignedGreaterThanOrEqual,
-                    val,
-                    right[0].first_index as i64,
-                );
+                let first_index = right[0].first_index;
+                let should_take_right_side =
+                    icmp_imm_u128(bx, IntCC::UnsignedGreaterThanOrEqual, val, first_index);
                bx.ins().brnz(should_take_right_side, right_block, &[]);
                bx.ins().jump(left_block, &[]);

@ -200,7 +199,7 @@ impl Switch {
                }
                (1, _) => {
                    ins_fallthrough_jump(was_branch, bx);
-                    let is_good_val = bx.ins().icmp_imm(IntCC::Equal, val, first_index as i64);
+                    let is_good_val = icmp_imm_u128(bx, IntCC::Equal, val, first_index);
                    bx.ins().brnz(is_good_val, blocks[0], &[]);
                }
                (_, 0) => {
@ -217,11 +216,8 @@ impl Switch {
                (_, _) => {
                    ins_fallthrough_jump(was_branch, bx);
                    let jt_block = bx.create_block();
-                    let is_good_val = bx.ins().icmp_imm(
-                        IntCC::UnsignedGreaterThanOrEqual,
-                        val,
-                        first_index as i64,
-                    );
+                    let is_good_val =
+                        icmp_imm_u128(bx, IntCC::UnsignedGreaterThanOrEqual, val, first_index);
                    bx.ins().brnz(is_good_val, jt_block, &[]);
                    bx.seal_block(jt_block);
                    cases_and_jt_blocks.push((first_index, jt_block, blocks));
@ -241,6 +237,13 @@ impl Switch {
        cases_and_jt_blocks: Vec<(EntryIndex, Block, Vec<Block>)>,
    ) {
        for (first_index, jt_block, blocks) in cases_and_jt_blocks.into_iter().rev() {
+            // There are currently no 128bit systems supported by rustc, but once we do ensure that
+            // we don't silently ignore a part of the jump table for 128bit integers on 128bit systems.
+            assert!(
+                u64::try_from(blocks.len()).is_ok(),
+                "Jump tables bigger than 2^64-1 are not yet supported"
+            );
+
            let mut jt_data = JumpTableData::new();
            for block in blocks {
                jt_data.push_entry(block);
@ -251,8 +254,33 @@ impl Switch {
            let discr = if first_index == 0 {
                val
            } else {
-                bx.ins().iadd_imm(val, (first_index as i64).wrapping_neg())
+                if let Ok(first_index) = u64::try_from(first_index) {
+                    bx.ins().iadd_imm(val, (first_index as i64).wrapping_neg())
+                } else {
+                    let (lsb, msb) = (first_index as u64, (first_index >> 64) as u64);
+                    let lsb = bx.ins().iconst(types::I64, lsb as i64);
+                    let msb = bx.ins().iconst(types::I64, msb as i64);
+                    let index = bx.ins().iconcat(lsb, msb);
+                    bx.ins().isub(val, index)
+                }
            };
+
+            let discr = if bx.func.dfg.value_type(discr).bits() > 64 {
+                // Check for overflow of cast to u64.
+                let new_block = bx.create_block();
+                let bigger_than_u64 =
+                    bx.ins()
+                        .icmp_imm(IntCC::UnsignedGreaterThan, discr, u64::max_value() as i64);
+                bx.ins().brnz(bigger_than_u64, otherwise, &[]);
+                bx.ins().jump(new_block, &[]);
+                bx.switch_to_block(new_block);
+
+                // Cast to u64, as br_table is not implemented for integers bigger than 64bits.
+                bx.ins().ireduce(types::I64, discr)
+            } else {
+                discr
+            };
+
            bx.ins().br_table(discr, otherwise, jump_table);
        }
    }
@ -278,6 +306,18 @@ impl Switch {
    }
 }

+fn icmp_imm_u128(bx: &mut FunctionBuilder, cond: IntCC, x: Value, y: u128) -> Value {
+    if let Ok(index) = u64::try_from(y) {
+        bx.ins().icmp_imm(cond, x, index as i64)
+    } else {
+        let (lsb, msb) = (y as u64, (y >> 64) as u64);
+        let lsb = bx.ins().iconst(types::I64, lsb as i64);
+        let msb = bx.ins().iconst(types::I64, msb as i64);
+        let index = bx.ins().iconcat(lsb, msb);
+        bx.ins().icmp(cond, x, index)
+    }
+}
+
 /// This represents a contiguous range of cases to switch on.
 ///
 /// For example 10 => block1, 11 => block2, 12 => block7 will be represented as:
@ -440,7 +480,7 @@ block10:

    #[test]
    fn switch_min_index_value() {
-        let func = setup!(0, [::core::i64::MIN as u64, 1,]);
+        let func = setup!(0, [::core::i64::MIN as u64 as u128, 1,]);
        assert_eq!(
            func,
            "block0:
@ -459,7 +499,7 @@ block3:

    #[test]
    fn switch_max_index_value() {
-        let func = setup!(0, [::core::i64::MAX as u64, 1,]);
+        let func = setup!(0, [::core::i64::MAX as u64 as u128, 1,]);
        assert_eq!(
            func,
            "block0:
@ -478,7 +518,7 @@ block3:

    #[test]
    fn switch_optimal_codegen() {
-        let func = setup!(0, [-1i64 as u64, 0, 1,]);
+        let func = setup!(0, [-1i64 as u64 as u128, 0, 1,]);
        assert_eq!(
            func,
            "    jt0 = jump_table [block2, block3]
@ -530,4 +570,45 @@ block4:

        builder.finalize(); // Will panic if some blocks are not sealed
    }
+
+    #[test]
+    fn switch_128bit() {
+        let mut func = Function::new();
+        let mut func_ctx = FunctionBuilderContext::new();
+        {
+            let mut bx = FunctionBuilder::new(&mut func, &mut func_ctx);
+            let block0 = bx.create_block();
+            bx.switch_to_block(block0);
+            let val = bx.ins().iconst(types::I128, 0);
+            let mut switch = Switch::new();
+            let block1 = bx.create_block();
+            switch.set_entry(1, block1);
+            let block2 = bx.create_block();
+            switch.set_entry(0, block2);
+            let block3 = bx.create_block();
+            switch.emit(&mut bx, val, block3);
+        }
+        let func = func
+            .to_string()
+            .trim_start_matches("function u0:0() fast {\n")
+            .trim_end_matches("\n}\n")
+            .to_string();
+        assert_eq!(
+            func,
+            "    jt0 = jump_table [block2, block1]
+
+block0:
+    v0 = iconst.i128 0
+    jump block4
+
+block4:
+    v1 = icmp_imm.i128 ugt v0, -1
+    brnz v1, block3
+    jump block5
+
+block5:
+    v2 = ireduce.i64 v0
+    br_table v2, block3, jt0"
+        );
+    }
 }
--- a/third_party/rust/cranelift-wasm/.cargo-checksum.json
+++ b/third_party/rust/cranelift-wasm/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"6c9d8563161a9803e876842482a1c34fd0ea740d5a7141fc51cec3c21ef60eec","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"e8d525ae48f967ebda012981b10dd11fbb46d9223fd95d1e3409da528851fcf7","src/environ/dummy.rs":"922d029491a9f5c55d22fcc9fbeae9e8c6721fa6556527785494f1351874e9f3","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"026a145c1cf9cd25c77e7ea8e0bb43739769dfc4693fcf827f6cdb79acf398a1","src/func_translator.rs":"b4391a11df5c401c9ddd26698105548b7a861c8deb5f84215f0b88cba5327362","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"47b575f0edbe8a2a3334261742870ce7424e13d91f8980609f9c963a2811e1f6","src/sections_translator.rs":"ebd08548e048c7f792da45aa8d710e7d6f047e9197bc86260743c97d00dd99f6","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"0a2a53a7f60a5192661ce4c95ee9bd6775e1eb7d32647e1c6e026b0f8849cd2c","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
+{"files":{"Cargo.toml":"34ad61b3a40b5bfee68d575e749314cf8395484c0484fd40d39a9bd1f46d3e14","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"77d407a26746381c1f433e0d13e758bfd2864936fc156c5eb5114a7dc146a2f1","src/environ/dummy.rs":"e9b06d1db4d25ab622d133ca927ec524a86d90d49eb67862dea0db734a0eadc4","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"b2ead10ea1f346d6fe2e4a5afc656754f0783fae98a3937b42cc106ad9e5eace","src/func_translator.rs":"48ee25da11063743459f9e9407512413075265e67713c6f5ab733798be2bf19d","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"def8b0853f1e802faf57b38e90016577887a0698a5abce5b3cee4cd67e07ecf0","src/sections_translator.rs":"8bbf6cf774076c88f176296065b392ff21ed512be806629cce5d275271eee3a8","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"7ca3cb06b4481bc3ae74697fbcd437aea1d851eaa3cfe18cc013a4af43728957","src/translation_utils.rs":"69f20c47ea22f0badd21a6187b5f9764252a00d19643a7e3e691797a9fe34f1b","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
--- a/third_party/rust/cranelift-wasm/Cargo.toml
+++ b/third_party/rust/cranelift-wasm/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "cranelift-wasm"
-version = "0.65.0"
+version = "0.66.0"
 authors = ["The Cranelift Project Developers"]
 description = "Translator from WebAssembly to Cranelift IR"
 documentation = "https://docs.rs/cranelift-wasm"
@ -12,10 +12,10 @@ keywords = ["webassembly", "wasm"]
 edition = "2018"

 [dependencies]
-wasmparser = { version = "0.58.0", default-features = false }
-cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false }
-cranelift-entity = { path = "../entity", version = "0.65.0" }
-cranelift-frontend = { path = "../frontend", version = "0.65.0", default-features = false }
+wasmparser = { version = "0.59.0", default-features = false }
+cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false }
+cranelift-entity = { path = "../entity", version = "0.66.0" }
+cranelift-frontend = { path = "../frontend", version = "0.66.0", default-features = false }
 hashbrown = { version = "0.7", optional = true }
 log = { version = "0.4.6", default-features = false }
 serde = { version = "1.0.94", features = ["derive"], optional = true }
@ -25,7 +25,7 @@ thiserror = "1.0.4"
 wat = "1.0.18"
 target-lexicon = "0.10"
 # Enable the riscv feature for cranelift-codegen, as some tests require it
-cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false, features = ["riscv"] }
+cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false, features = ["riscv"] }

 [features]
 default = ["std"]
--- a/third_party/rust/cranelift-wasm/src/code_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/code_translator.rs
@ -30,6 +30,7 @@ use crate::translation_utils::{
 };
 use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex};
 use crate::wasm_unsupported;
+use core::convert::TryInto;
 use core::{i32, u32};
 use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
 use cranelift_codegen::ir::immediates::Offset32;
@ -1039,8 +1040,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        Operator::F32Le | Operator::F64Le => {
            translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
        }
-        Operator::RefNull { ty } => state.push1(environ.translate_ref_null(builder.cursor(), *ty)?),
-        Operator::RefIsNull { ty: _ } => {
+        Operator::RefNull { ty } => {
+            state.push1(environ.translate_ref_null(builder.cursor(), (*ty).try_into()?)?)
+        }
+        Operator::RefIsNull => {
            let value = state.pop1();
            state.push1(environ.translate_ref_is_null(builder.cursor(), value)?);
        }
@ -1559,22 +1562,59 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let a = pop1_with_bitcast(state, F32X4, builder);
            state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
        }
-        Operator::I32x4TruncSatF32x4U
-        | Operator::I8x16NarrowI16x8S { .. }
-        | Operator::I8x16NarrowI16x8U { .. }
-        | Operator::I16x8NarrowI32x4S { .. }
-        | Operator::I16x8NarrowI32x4U { .. }
-        | Operator::I16x8WidenLowI8x16S { .. }
-        | Operator::I16x8WidenHighI8x16S { .. }
-        | Operator::I16x8WidenLowI8x16U { .. }
-        | Operator::I16x8WidenHighI8x16U { .. }
-        | Operator::I32x4WidenLowI16x8S { .. }
-        | Operator::I32x4WidenHighI16x8S { .. }
-        | Operator::I32x4WidenLowI16x8U { .. }
-        | Operator::I32x4WidenHighI16x8U { .. }
-        | Operator::I8x16Bitmask
-        | Operator::I16x8Bitmask
-        | Operator::I32x4Bitmask => {
+        Operator::I32x4TruncSatF32x4U => {
+            let a = pop1_with_bitcast(state, F32X4, builder);
+            state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
+        }
+        Operator::I8x16NarrowI16x8S => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().snarrow(a, b))
+        }
+        Operator::I16x8NarrowI32x4S => {
+            let (a, b) = pop2_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().snarrow(a, b))
+        }
+        Operator::I8x16NarrowI16x8U => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().unarrow(a, b))
+        }
+        Operator::I16x8NarrowI32x4U => {
+            let (a, b) = pop2_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().unarrow(a, b))
+        }
+        Operator::I16x8WidenLowI8x16S => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().swiden_low(a))
+        }
+        Operator::I16x8WidenHighI8x16S => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().swiden_high(a))
+        }
+        Operator::I16x8WidenLowI8x16U => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().uwiden_low(a))
+        }
+        Operator::I16x8WidenHighI8x16U => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().uwiden_high(a))
+        }
+        Operator::I32x4WidenLowI16x8S => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().swiden_low(a))
+        }
+        Operator::I32x4WidenHighI16x8S => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().swiden_high(a))
+        }
+        Operator::I32x4WidenLowI16x8U => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().uwiden_low(a))
+        }
+        Operator::I32x4WidenHighI16x8U => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().uwiden_high(a))
+        }
+        Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => {
            return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
        }

--- a/third_party/rust/cranelift-wasm/src/environ/dummy.rs
+++ b/third_party/rust/cranelift-wasm/src/environ/dummy.rs
@ -547,7 +547,7 @@ impl TargetEnvironment for DummyEnvironment {
 }

 impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
-    fn declare_signature(&mut self, _wasm: &WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
+    fn declare_signature(&mut self, _wasm: WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
        self.info.signatures.push(sig);
        Ok(())
    }
--- a/third_party/rust/cranelift-wasm/src/environ/spec.rs
+++ b/third_party/rust/cranelift-wasm/src/environ/spec.rs
@ -12,22 +12,89 @@ use crate::translation_utils::{
    Table, TableIndex,
 };
 use core::convert::From;
+use core::convert::TryFrom;
 use cranelift_codegen::cursor::FuncCursor;
 use cranelift_codegen::ir::immediates::Offset32;
 use cranelift_codegen::ir::{self, InstBuilder};
 use cranelift_codegen::isa::TargetFrontendConfig;
 use cranelift_frontend::FunctionBuilder;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 use std::boxed::Box;
+use std::string::ToString;
 use thiserror::Error;
 use wasmparser::BinaryReaderError;
 use wasmparser::Operator;

-// Re-export `wasmparser`'s function and value types so that consumers can
-// associate this the original Wasm signature with each compiled function. This
-// is often necessary because while each Wasm signature gets compiled down into
-// a single native signature, multiple Wasm signatures might compile down into
-// the same native signature.
-pub use wasmparser::{FuncType as WasmFuncType, Type as WasmType};
+/// WebAssembly value type -- equivalent of `wasmparser`'s Type.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub enum WasmType {
+    /// I32 type
+    I32,
+    /// I64 type
+    I64,
+    /// F32 type
+    F32,
+    /// F64 type
+    F64,
+    /// V128 type
+    V128,
+    /// FuncRef type
+    FuncRef,
+    /// ExternRef type
+    ExternRef,
+}
+
+impl TryFrom<wasmparser::Type> for WasmType {
+    type Error = WasmError;
+    fn try_from(ty: wasmparser::Type) -> Result<Self, Self::Error> {
+        use wasmparser::Type::*;
+        match ty {
+            I32 => Ok(WasmType::I32),
+            I64 => Ok(WasmType::I64),
+            F32 => Ok(WasmType::F32),
+            F64 => Ok(WasmType::F64),
+            V128 => Ok(WasmType::V128),
+            FuncRef => Ok(WasmType::FuncRef),
+            ExternRef => Ok(WasmType::ExternRef),
+            EmptyBlockType | Func => Err(WasmError::InvalidWebAssembly {
+                message: "unexpected value type".to_string(),
+                offset: 0,
+            }),
+        }
+    }
+}
+
+/// WebAssembly function type -- equivalent of `wasmparser`'s FuncType.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct WasmFuncType {
+    /// Function params types.
+    pub params: Box<[WasmType]>,
+    /// Returns params types.
+    pub returns: Box<[WasmType]>,
+}
+
+impl TryFrom<wasmparser::FuncType> for WasmFuncType {
+    type Error = WasmError;
+    fn try_from(ty: wasmparser::FuncType) -> Result<Self, Self::Error> {
+        Ok(Self {
+            params: ty
+                .params
+                .into_vec()
+                .into_iter()
+                .map(WasmType::try_from)
+                .collect::<Result<_, Self::Error>>()?,
+            returns: ty
+                .returns
+                .into_vec()
+                .into_iter()
+                .map(WasmType::try_from)
+                .collect::<Result<_, Self::Error>>()?,
+        })
+    }
+}

 /// The value of a WebAssembly global variable.
 #[derive(Clone, Copy)]
@ -524,7 +591,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment {
    /// Declares a function signature to the environment.
    fn declare_signature(
        &mut self,
-        wasm_func_type: &WasmFuncType,
+        wasm_func_type: WasmFuncType,
        sig: ir::Signature,
    ) -> WasmResult<()>;

--- a/third_party/rust/cranelift-wasm/src/func_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/func_translator.rs
@ -9,6 +9,7 @@ use crate::environ::{FuncEnvironment, ReturnMode, WasmResult};
 use crate::state::{FuncTranslationState, ModuleTranslationState};
 use crate::translation_utils::get_vmctx_value_label;
 use crate::wasm_unsupported;
+use core::convert::TryInto;
 use cranelift_codegen::entity::EntityRef;
 use cranelift_codegen::ir::{self, Block, InstBuilder, ValueLabel};
 use cranelift_codegen::timing;
@ -196,7 +197,9 @@ fn declare_locals<FE: FuncEnvironment + ?Sized>(
            let constant_handle = builder.func.dfg.constants.insert([0; 16].to_vec().into());
            builder.ins().vconst(ir::types::I8X16, constant_handle)
        }
-        ExternRef | FuncRef => environ.translate_ref_null(builder.cursor(), wasm_type)?,
+        ExternRef | FuncRef => {
+            environ.translate_ref_null(builder.cursor(), wasm_type.try_into()?)?
+        }
        ty => return Err(wasm_unsupported!("unsupported local type {:?}", ty)),
    };

--- a/third_party/rust/cranelift-wasm/src/module_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/module_translator.rs
@ -2,13 +2,13 @@
 //! to deal with each part of it.
 use crate::environ::{ModuleEnvironment, WasmResult};
 use crate::sections_translator::{
-    parse_code_section, parse_data_section, parse_element_section, parse_export_section,
-    parse_function_section, parse_global_section, parse_import_section, parse_memory_section,
-    parse_name_section, parse_start_section, parse_table_section, parse_type_section,
+    parse_data_section, parse_element_section, parse_export_section, parse_function_section,
+    parse_global_section, parse_import_section, parse_memory_section, parse_name_section,
+    parse_start_section, parse_table_section, parse_type_section,
 };
 use crate::state::ModuleTranslationState;
 use cranelift_codegen::timing;
-use wasmparser::{CustomSectionContent, ModuleReader, SectionContent};
+use wasmparser::{NameSectionReader, Parser, Payload};

 /// Translate a sequence of bytes forming a valid Wasm binary into a list of valid Cranelift IR
 /// [`Function`](cranelift_codegen::ir::Function).
@ -17,80 +17,85 @@ pub fn translate_module<'data>(
    environ: &mut dyn ModuleEnvironment<'data>,
 ) -> WasmResult<ModuleTranslationState> {
    let _tt = timing::wasm_translate_module();
-    let mut reader = ModuleReader::new(data)?;
    let mut module_translation_state = ModuleTranslationState::new();

-    while !reader.eof() {
-        let section = reader.read()?;
-        match section.content()? {
-            SectionContent::Type(types) => {
+    for payload in Parser::new(0).parse_all(data) {
+        match payload? {
+            Payload::Version { .. } | Payload::End => {}
+
+            Payload::TypeSection(types) => {
                parse_type_section(types, &mut module_translation_state, environ)?;
            }

-            SectionContent::Import(imports) => {
+            Payload::ImportSection(imports) => {
                parse_import_section(imports, environ)?;
            }

-            SectionContent::Function(functions) => {
+            Payload::FunctionSection(functions) => {
                parse_function_section(functions, environ)?;
            }

-            SectionContent::Table(tables) => {
+            Payload::TableSection(tables) => {
                parse_table_section(tables, environ)?;
            }

-            SectionContent::Memory(memories) => {
+            Payload::MemorySection(memories) => {
                parse_memory_section(memories, environ)?;
            }

-            SectionContent::Global(globals) => {
+            Payload::GlobalSection(globals) => {
                parse_global_section(globals, environ)?;
            }

-            SectionContent::Export(exports) => {
+            Payload::ExportSection(exports) => {
                parse_export_section(exports, environ)?;
            }

-            SectionContent::Start(start) => {
-                parse_start_section(start, environ)?;
+            Payload::StartSection { func, .. } => {
+                parse_start_section(func, environ)?;
            }

-            SectionContent::Element(elements) => {
+            Payload::ElementSection(elements) => {
                parse_element_section(elements, environ)?;
            }

-            SectionContent::Code(code) => {
-                parse_code_section(code, &module_translation_state, environ)?;
+            Payload::CodeSectionStart { .. } => {}
+            Payload::CodeSectionEntry(code) => {
+                let mut code = code.get_binary_reader();
+                let size = code.bytes_remaining();
+                let offset = code.original_position();
+                environ.define_function_body(
+                    &module_translation_state,
+                    code.read_bytes(size)?,
+                    offset,
+                )?;
            }

-            SectionContent::Data(data) => {
+            Payload::DataSection(data) => {
                parse_data_section(data, environ)?;
            }

-            SectionContent::DataCount(count) => {
+            Payload::DataCountSection { count, .. } => {
                environ.reserve_passive_data(count)?;
            }

-            SectionContent::Module(_)
-            | SectionContent::ModuleCode(_)
-            | SectionContent::Instance(_)
-            | SectionContent::Alias(_) => unimplemented!("module linking not implemented yet"),
+            Payload::ModuleSection(_)
+            | Payload::InstanceSection(_)
+            | Payload::AliasSection(_)
+            | Payload::ModuleCodeSectionStart { .. }
+            | Payload::ModuleCodeSectionEntry { .. } => {
+                unimplemented!("module linking not implemented yet")
+            }

-            SectionContent::Custom {
-                name,
-                binary,
-                content,
-            } => match content {
-                Some(CustomSectionContent::Name(names)) => {
-                    parse_name_section(names, environ)?;
-                }
-                _ => {
-                    let mut reader = binary.clone();
-                    let len = reader.bytes_remaining();
-                    let payload = reader.read_bytes(len)?;
-                    environ.custom_section(name, payload)?;
-                }
-            },
+            Payload::CustomSection {
+                name: "name",
+                data,
+                data_offset,
+            } => parse_name_section(NameSectionReader::new(data, data_offset)?, environ)?,
+
+            Payload::CustomSection { name, data, .. } => environ.custom_section(name, data)?,
+
+            Payload::UnknownSection { .. } => unreachable!(),
        }
    }

--- a/third_party/rust/cranelift-wasm/src/sections_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/sections_translator.rs
@ -15,6 +15,7 @@ use crate::translation_utils::{
 };
 use crate::{wasm_unsupported, HashMap};
 use core::convert::TryFrom;
+use core::convert::TryInto;
 use cranelift_codegen::ir::immediates::V128Imm;
 use cranelift_codegen::ir::{self, AbiParam, Signature};
 use cranelift_entity::packed_option::ReservedValue;
@ -22,11 +23,11 @@ use cranelift_entity::EntityRef;
 use std::boxed::Box;
 use std::vec::Vec;
 use wasmparser::{
-    self, CodeSectionReader, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems,
-    ElementKind, ElementSectionReader, Export, ExportSectionReader, ExternalKind,
-    FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType,
-    ImportSectionReader, MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader,
-    Operator, TableSectionReader, Type, TypeDef, TypeSectionReader,
+    self, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems, ElementKind,
+    ElementSectionReader, Export, ExportSectionReader, ExternalKind, FunctionSectionReader,
+    GlobalSectionReader, GlobalType, ImportSectionEntryType, ImportSectionReader,
+    MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader, Operator,
+    TableSectionReader, Type, TypeDef, TypeSectionReader,
 };

 /// Parses the Type section of the wasm module.
@ -53,7 +54,7 @@ pub fn parse_type_section(
                    .expect("only numeric types are supported in function signatures");
                AbiParam::new(cret_arg)
            }));
-            environ.declare_signature(&wasm_func_ty, sig)?;
+            environ.declare_signature(wasm_func_ty.clone().try_into()?, sig)?;
            module_translation_state
                .wasm_types
                .push((wasm_func_ty.params, wasm_func_ty.returns));
@ -104,7 +105,7 @@ pub fn parse_import_section<'data>(
            ImportSectionEntryType::Global(ref ty) => {
                environ.declare_global_import(
                    Global {
-                        wasm_ty: ty.content_type,
+                        wasm_ty: ty.content_type.try_into()?,
                        ty: type_to_type(ty.content_type, environ).unwrap(),
                        mutability: ty.mutable,
                        initializer: GlobalInit::Import,
@ -116,7 +117,7 @@ pub fn parse_import_section<'data>(
            ImportSectionEntryType::Table(ref tab) => {
                environ.declare_table_import(
                    Table {
-                        wasm_ty: tab.element_type,
+                        wasm_ty: tab.element_type.try_into()?,
                        ty: match tabletype_to_type(tab.element_type, environ)? {
                            Some(t) => TableElementType::Val(t),
                            None => TableElementType::Func,
@ -166,7 +167,7 @@ pub fn parse_table_section(
    for entry in tables {
        let table = entry?;
        environ.declare_table(Table {
-            wasm_ty: table.element_type,
+            wasm_ty: table.element_type.try_into()?,
            ty: match tabletype_to_type(table.element_type, environ)? {
                Some(t) => TableElementType::Val(t),
                None => TableElementType::Func,
@ -237,7 +238,7 @@ pub fn parse_global_section(
            }
        };
        let global = Global {
-            wasm_ty: content_type,
+            wasm_ty: content_type.try_into()?,
            ty: type_to_type(content_type, environ).unwrap(),
            mutability: mutable,
            initializer,
@ -357,21 +358,6 @@ pub fn parse_element_section<'data>(
    Ok(())
 }

-/// Parses the Code section of the wasm module.
-pub fn parse_code_section<'data>(
-    code: CodeSectionReader<'data>,
-    module_translation_state: &ModuleTranslationState,
-    environ: &mut dyn ModuleEnvironment<'data>,
-) -> WasmResult<()> {
-    for body in code {
-        let mut reader = body?.get_binary_reader();
-        let size = reader.bytes_remaining();
-        let offset = reader.original_position();
-        environ.define_function_body(module_translation_state, reader.read_bytes(size)?, offset)?;
-    }
-    Ok(())
-}
-
 /// Parses the Data section of the wasm module.
 pub fn parse_data_section<'data>(
    data: DataSectionReader<'data>,
--- a/third_party/rust/cranelift-wasm/src/state/module_state.rs
+++ b/third_party/rust/cranelift-wasm/src/state/module_state.rs
@ -30,6 +30,7 @@ fn cranelift_to_wasmparser_type(ty: Type) -> WasmResult<wasmparser::Type> {
        types::I64 => wasmparser::Type::I64,
        types::F32 => wasmparser::Type::F32,
        types::F64 => wasmparser::Type::F64,
+        types::R32 | types::R64 => wasmparser::Type::ExternRef,
        _ => {
            return Err(WasmError::Unsupported(format!(
                "Cannot convert Cranelift type to Wasm signature: {:?}",
--- a/third_party/rust/cranelift-wasm/src/translation_utils.rs
+++ b/third_party/rust/cranelift-wasm/src/translation_utils.rs
@ -2,6 +2,7 @@
 use crate::environ::{TargetEnvironment, WasmResult, WasmType};
 use crate::state::ModuleTranslationState;
 use crate::wasm_unsupported;
+use core::convert::TryInto;
 use core::u32;
 use cranelift_codegen::entity::entity_impl;
 use cranelift_codegen::ir;
@ -39,31 +40,37 @@ entity_impl!(DefinedGlobalIndex);

 /// Index type of a table (imported or defined) inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct TableIndex(u32);
 entity_impl!(TableIndex);

 /// Index type of a global variable (imported or defined) inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct GlobalIndex(u32);
 entity_impl!(GlobalIndex);

 /// Index type of a linear memory (imported or defined) inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct MemoryIndex(u32);
 entity_impl!(MemoryIndex);

 /// Index type of a signature (imported or defined) inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct SignatureIndex(u32);
 entity_impl!(SignatureIndex);

 /// Index type of a passive data segment inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct DataIndex(u32);
 entity_impl!(DataIndex);

 /// Index type of a passive element segment inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ElemIndex(u32);
 entity_impl!(ElemIndex);

@ -75,6 +82,7 @@ entity_impl!(ElemIndex);
 /// Wasm `i64` and a `funcref` might be represented with a Cranelift `i64` on
 /// 64-bit architectures, and when GC is not required for func refs.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Global {
    /// The Wasm type of the value stored in the global.
    pub wasm_ty: crate::WasmType,
@ -88,6 +96,7 @@ pub struct Global {

 /// Globals are initialized via the `const` operators or by referring to another import.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum GlobalInit {
    /// An `i32.const`.
    I32Const(i32),
@ -111,6 +120,7 @@ pub enum GlobalInit {

 /// WebAssembly table.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Table {
    /// The table elements' Wasm type.
    pub wasm_ty: WasmType,
@ -124,6 +134,7 @@ pub struct Table {

 /// WebAssembly table element. Can be a function or a scalar type.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum TableElementType {
    /// A scalar type.
    Val(ir::Type),
@ -133,6 +144,7 @@ pub enum TableElementType {

 /// WebAssembly linear memory.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct Memory {
    /// The minimum number of pages in the memory.
    pub minimum: u32,
@ -153,7 +165,9 @@ pub fn type_to_type<PE: TargetEnvironment + ?Sized>(
        wasmparser::Type::F32 => Ok(ir::types::F32),
        wasmparser::Type::F64 => Ok(ir::types::F64),
        wasmparser::Type::V128 => Ok(ir::types::I8X16),
-        wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => Ok(environ.reference_type(ty)),
+        wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => {
+            Ok(environ.reference_type(ty.try_into()?))
+        }
        ty => Err(wasm_unsupported!("type_to_type: wasm type {:?}", ty)),
    }
 }
@ -170,7 +184,7 @@ pub fn tabletype_to_type<PE: TargetEnvironment + ?Sized>(
        wasmparser::Type::F32 => Ok(Some(ir::types::F32)),
        wasmparser::Type::F64 => Ok(Some(ir::types::F64)),
        wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)),
-        wasmparser::Type::ExternRef => Ok(Some(environ.reference_type(ty))),
+        wasmparser::Type::ExternRef => Ok(Some(environ.reference_type(ty.try_into()?))),
        wasmparser::Type::FuncRef => Ok(None),
        ty => Err(wasm_unsupported!(
            "tabletype_to_type: table wasm type {:?}",
@ -226,7 +240,7 @@ pub fn block_with_params<PE: TargetEnvironment + ?Sized>(
                builder.append_block_param(block, ir::types::F64);
            }
            wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => {
-                builder.append_block_param(block, environ.reference_type(*ty));
+                builder.append_block_param(block, environ.reference_type((*ty).try_into()?));
            }
            wasmparser::Type::V128 => {
                builder.append_block_param(block, ir::types::I8X16);
--- a/third_party/rust/regalloc/.cargo-checksum.json
+++ b/third_party/rust/regalloc/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"9fca7a43287a7b615baacc0a1a6ffcb5778793feeeeb37a3e2329d8494bd0744","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","src/analysis_control_flow.rs":"82e71cd345f1d0eb93db23e28048f4b1b3f0dfaf729220df561477be2667a26f","src/analysis_data_flow.rs":"1edb01adc754330a56b7d2c9b2fd28243f9d42a77b828dfbf069a16b6ae28b0d","src/analysis_main.rs":"fda064ae61d15b4f336ab9f4b7fb55fc51ed284f311024ec7b7dec002933a83b","src/avl_tree.rs":"5124db9c746d9c60de4e2cea91f99d68838e7f80585b69cdfea9c53b8d7c6275","src/bt_coalescing_analysis.rs":"7f1c4ac1e844bdf255b63764373a1e71b942fbe21aea65f341d3c0175a2b6805","src/bt_commitment_map.rs":"4209822d3bfa15295ec345616c5da8256603fe7460f4cffa8176f236cc1ddebe","src/bt_main.rs":"f9b745f07dfa4cc892e6ebd06bd6f5e4ffce1aac3ab0585283185bb2bf2b4e58","src/bt_spillslot_allocator.rs":"3939ebe38ddb324f5bcac0da708e0f7d6d91a5735f3342a86f93c6ab551b2731","src/bt_vlr_priority_queue.rs":"082d9ede8c1e7ec3129356450b42056e090f1dae77688589f71be7055eff5333","src/checker.rs":"b767b5e2e014eeddc248cbd76e55f3d45db92a60a280d09e0923bb129f720c1c","src/data_structures.rs":"eca398adfe3e8e7c3c1482c856bbbdaef3fc1f042008a0031171f8324b2e2320","src/inst_stream.rs":"1ade5e2685a29c00b9d8a82c0e295666c68b6b16d28c23b3f43239cc2997f0bf","src/lib.rs":"683a8c53098bc246049a2d55454c4ec86639e692a3950b298e116a80e2493603","src/linear_scan/analysis.rs":"babd475dc0b703e49babc27913676b4ad3f277397f3d2f751cd4fb3743c00ec8","src/linear_scan/assign_registers.rs":"c4e75bc01f447ee8abeb7420172550d114c83813f8b4e81fce1d10c423955088","src/linear_scan/mod.rs":"b238d4c7ef2e71f79b49cdb9c97df784e40e7e659347e3895d4e32244891c54d","src/linear_scan/resolve_moves.rs":"502ee5f515da69c6368a0db50165e9351a2064f226c202f0ac3e1008349f1592","src/reg_maps.rs":"87ede67e8e550fd8e3aa4d880989b2bdf6f21b5668c5fb657039627aa8c1c3d3","src/snapshot.rs":"c434ad0477ef66c2bc6a8ae1ed49df544c5c9185aeb6149fbc682b94aef8aedd","src/sparse_set.rs":"4f0dd9552c1eb190441ed4f1a84aa6861ef44ab825b2df551bd721d69d36c53e","src/union_find.rs":"78f5863cd61ad8c6a21b1b1ccc0996d9c19b469ff8ebd101a1e9f0a3e10f1b7c"},"package":"7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95"}
+{"files":{"Cargo.toml":"c0be0c0ada75a2b5625764ddea1e8abf7feb3048c3eddba825b86d21401c8af2","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","src/analysis_control_flow.rs":"82e71cd345f1d0eb93db23e28048f4b1b3f0dfaf729220df561477be2667a26f","src/analysis_data_flow.rs":"e6caeabd27281dffd5c3e5ed7825c5fa36e881dd3b63e7a0a37b429e207fc1e5","src/analysis_main.rs":"49acbb4ca0ee234d40c70f190341d39894aff1c60449e4258cdcf8c31023168f","src/analysis_reftypes.rs":"efe925c5ebd76c97c97675d998d02151e9e7fcf6473eaa2e41f8dabd369c2c5e","src/avl_tree.rs":"2e48fe5700273f9c3838a69856b341cc993169987ace0580d4de295953449b9b","src/bt_coalescing_analysis.rs":"aed531df612f6f2297ef11ada850091006c72a7a15ea44be20e6e8132cff61a0","src/bt_commitment_map.rs":"2dabf2896b7b2c96d9cf9bf26cdd397765b468772bf44fbb0337eff85433d0f7","src/bt_main.rs":"ee6805a830fbf9c9c4ccce0d9236c8fdf4aab2a1cbcab623187442858b7e7194","src/bt_spillslot_allocator.rs":"3534171c6e156c3d5a948036430a80a2ca7ba728a3e4b33e598479740cffe0e3","src/bt_vlr_priority_queue.rs":"082d9ede8c1e7ec3129356450b42056e090f1dae77688589f71be7055eff5333","src/checker.rs":"65d3e2e5d3e4e4dc05844bbf4e9515bdc21d8d324f2923e4ec6ae2ba7602dee3","src/data_structures.rs":"ffa1d22e85a325d8683dcd11b5b6c6dbd4cd3388df542488a684b154b639d234","src/inst_stream.rs":"9b453924b228bd5d137877769ad6ac892eb22e642d52822f2aeeeb401ac9e386","src/lib.rs":"aa07a5e33bb2b5d6599bca5a3f10a964bb3b62d0a8d52db46b6b4f3ae75f148a","src/linear_scan/analysis.rs":"babd475dc0b703e49babc27913676b4ad3f277397f3d2f751cd4fb3743c00ec8","src/linear_scan/assign_registers.rs":"c4e75bc01f447ee8abeb7420172550d114c83813f8b4e81fce1d10c423955088","src/linear_scan/mod.rs":"dcccdff0b534865776aa807abb268b2b3e008c2b2ac5dacb44eaf1f47e00c472","src/linear_scan/resolve_moves.rs":"2c51e4d6a096454db79090b078780ee9938eae4dd1fe0d32103bdc4e56e4e3c8","src/reg_maps.rs":"87ede67e8e550fd8e3aa4d880989b2bdf6f21b5668c5fb657039627aa8c1c3d3","src/snapshot.rs":"62ff934004a93697d48049e0dae1b99717c56ca35154d3a12d6ba22e47fe0d16","src/sparse_set.rs":"4f0dd9552c1eb190441ed4f1a84aa6861ef44ab825b2df551bd721d69d36c53e","src/union_find.rs":"78f5863cd61ad8c6a21b1b1ccc0996d9c19b469ff8ebd101a1e9f0a3e10f1b7c"},"package":"3598bed0895fe0f72a9e0b00ef9e3a3c8af978a8401b2f2046dec5927de6364a"}
--- a/third_party/rust/regalloc/Cargo.toml
+++ b/third_party/rust/regalloc/Cargo.toml
@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "regalloc"
-version = "0.0.26"
+version = "0.0.28"
 authors = ["The Regalloc.rs Developers"]
 description = "Modular register allocation algorithms"
 license = "Apache-2.0 WITH LLVM-exception"
--- a/third_party/rust/regalloc/src/analysis_data_flow.rs
+++ b/third_party/rust/regalloc/src/analysis_data_flow.rs
@ -7,10 +7,11 @@ use std::fmt;

 use crate::analysis_control_flow::CFGInfo;
 use crate::data_structures::{
-    BlockIx, InstIx, InstPoint, Point, Queue, RangeFrag, RangeFragIx, RangeFragKind,
-    RangeFragMetrics, RealRange, RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegSets,
-    RegUsageCollector, RegVecBounds, RegVecs, RegVecsAndBounds, SortedRangeFragIxs,
-    SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
+    BlockIx, InstIx, InstPoint, MoveInfo, MoveInfoElem, Point, Queue, RangeFrag, RangeFragIx,
+    RangeFragKind, RangeFragMetrics, RangeId, RealRange, RealRangeIx, RealReg, RealRegUniverse,
+    Reg, RegClass, RegSets, RegToRangesMaps, RegUsageCollector, RegVecBounds, RegVecs,
+    RegVecsAndBounds, SortedRangeFragIxs, SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange,
+    VirtualRangeIx, VirtualReg,
 };
 use crate::sparse_set::SparseSet;
 use crate::union_find::{ToFromU32, UnionFind};
@ -1154,9 +1155,7 @@ pub fn get_range_frags<F: Function>(
    assert!(rvb.is_sanitized());

    // In order that we can work with unified-reg-indices (see comments above), we need to know
-    // (1) how many virtual regs there are and (2) the `RegClass` for each.  That info is
-    // collected in a single pass here.  In principle regalloc.rs's user could tell us (1), but
-    // as yet the interface does not make that possible.
+    // the `RegClass` for each virtual register.  That info is collected here.
    let mut vreg_classes = vec![RegClass::INVALID; func.get_num_vregs()];
    for r in rvb
        .vecs
@ -1458,6 +1457,7 @@ fn create_and_add_range(
            vreg: reg.to_virtual_reg(),
            rreg: None,
            sorted_frags,
+            is_ref: false, // analysis_reftypes.rs may later change this
            size,
            total_cost,
            spill_cost,
@ -1466,6 +1466,7 @@ fn create_and_add_range(
        result_real.push(RealRange {
            rreg: reg.to_real_reg(),
            sorted_frags: sorted_frag_ixs,
+            is_ref: false, // analysis_reftypes.rs may later change this
        });
    }
 }
@ -1806,3 +1807,154 @@ pub fn merge_range_frags(

    (result_real, result_virtual)
 }
+
+//=============================================================================
+// Auxiliary activities that mostly fall under the category "dataflow analysis", but are not
+// part of the main dataflow analysis pipeline.
+
+// Dataflow and liveness together create vectors of VirtualRanges and RealRanges.  These define
+// (amongst other things) mappings from VirtualRanges to VirtualRegs and from RealRanges to
+// RealRegs.  However, we often need the inverse mappings: from VirtualRegs to (sets of
+// VirtualRanges) and from RealRegs to (sets of) RealRanges.  This function computes those
+// inverse mappings.  They are used by BT's coalescing analysis, and for the dataflow analysis
+// that supports reftype handling.
+#[inline(never)]
+pub fn compute_reg_to_ranges_maps<F: Function>(
+    func: &F,
+    univ: &RealRegUniverse,
+    rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+    vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+) -> RegToRangesMaps {
+    // We have in hand the virtual live ranges.  Each of these carries its
+    // associated vreg.  So in effect we have a VLR -> VReg mapping.  We now
+    // invert that, so as to generate a mapping from VRegs to their containing
+    // VLRs.
+    //
+    // Note that multiple VLRs may map to the same VReg.  So the inverse mapping
+    // will actually be from VRegs to a set of VLRs.  In most cases, we expect
+    // the virtual-registerised-code given to this allocator to be derived from
+    // SSA, in which case each VReg will have only one VLR.  So in this case,
+    // the cost of first creating the mapping, and then looking up all the VRegs
+    // in moves in it, will have cost linear in the size of the input function.
+    //
+    // NB re the SmallVec.  That has set semantics (no dups).
+    let mut vreg_to_vlrs_map = vec![SmallVec::<[VirtualRangeIx; 3]>::new(); func.get_num_vregs()];
+    for (vlr, n) in vlr_env.iter().zip(0..) {
+        let vlrix = VirtualRangeIx::new(n);
+        let vreg: VirtualReg = vlr.vreg;
+        // Now we know that there's a VLR `vlr` that is for VReg `vreg`.  Update the inverse
+        // mapping accordingly.  We know we are stepping sequentially through the VLR (index)
+        // space, so we'll never see the same VLRIx twice.  Hence there's no need to check for
+        // dups when adding a VLR index to an existing binding for a VReg.
+        //
+        // If this array-indexing fails, it means the client's `.get_num_vregs()` function
+        // claims there are fewer virtual regs than we actually observe in the code it gave us.
+        // So it's a bug in the client.
+        vreg_to_vlrs_map[vreg.get_index()].push(vlrix);
+    }
+
+    // Same for the real live ranges.
+    let mut rreg_to_rlrs_map = vec![Vec::<RealRangeIx>::new(); univ.allocable];
+    for (rlr, n) in rlr_env.iter().zip(0..) {
+        let rlrix = RealRangeIx::new(n);
+        let rreg: RealReg = rlr.rreg;
+        // If this array-indexing fails, it means something has gone wrong with sanitisation of
+        // real registers -- that should ensure that we never see a real register with an index
+        // greater than `univ.allocable`.  So it's a bug in the allocator's analysis phases.
+        rreg_to_rlrs_map[rreg.get_index()].push(rlrix);
+    }
+
+    RegToRangesMaps {
+        rreg_to_rlrs_map,
+        vreg_to_vlrs_map,
+    }
+}
+
+// Collect info about registers (and optionally Virtual/RealRanges) that are
+// connected by moves:
+#[inline(never)]
+pub fn collect_move_info<F: Function>(
+    func: &F,
+    reg_vecs_and_bounds: &RegVecsAndBounds,
+    est_freqs: &TypedIxVec<BlockIx, u32>,
+    reg_to_ranges_maps: &RegToRangesMaps,
+    rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+    vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+    fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
+    want_ranges: bool,
+) -> MoveInfo {
+    // Helper: find the RealRange or VirtualRange for a register at an InstPoint.
+    let find_range_for_reg = |pt: InstPoint, reg: Reg| {
+        if !want_ranges {
+            return RangeId::invalid_value();
+        }
+        if reg.is_real() {
+            for &rlrix in &reg_to_ranges_maps.rreg_to_rlrs_map[reg.get_index() as usize] {
+                if rlr_env[rlrix].sorted_frags.contains_pt(fenv, pt) {
+                    return RangeId::new_real(rlrix);
+                }
+            }
+        } else {
+            for &vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[reg.get_index() as usize] {
+                if vlr_env[vlrix].sorted_frags.contains_pt(pt) {
+                    return RangeId::new_virtual(vlrix);
+                }
+            }
+        }
+        RangeId::invalid_value()
+    };
+
+    let mut moves = Vec::<MoveInfoElem>::new();
+    for b in func.blocks() {
+        let block_eef = est_freqs[b];
+        for iix in func.block_insns(b) {
+            let insn = &func.get_insn(iix);
+            let im = func.is_move(insn);
+            match im {
+                None => {}
+                Some((wreg, reg)) => {
+                    let iix_bounds = &reg_vecs_and_bounds.bounds[iix];
+                    // It might seem strange to assert that `defs_len` and/or
+                    // `uses_len` is <= 1 rather than == 1.  The reason is
+                    // that either or even both registers might be ones which
+                    // are not available to the allocator.  Hence they will
+                    // have been removed by the sanitisation machinery before
+                    // we get to this point.  If either is missing, we
+                    // unfortunately can't coalesce the move away, and just
+                    // have to live with it.
+                    //
+                    // If any of the following five assertions fail, the
+                    // client's `is_move` is probably lying to us.
+                    assert!(iix_bounds.uses_len <= 1);
+                    assert!(iix_bounds.defs_len <= 1);
+                    assert!(iix_bounds.mods_len == 0);
+                    if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
+                        let reg_vecs = &reg_vecs_and_bounds.vecs;
+                        assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
+                        assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
+                        let dst = wreg.to_reg();
+                        let src = reg;
+                        let est_freq = block_eef;
+
+                        // Find the ranges for source and dest, if requested.
+                        let (src_range, dst_range) = (
+                            find_range_for_reg(InstPoint::new(iix, Point::Use), src),
+                            find_range_for_reg(InstPoint::new(iix, Point::Def), dst),
+                        );
+
+                        moves.push(MoveInfoElem {
+                            dst,
+                            dst_range,
+                            src,
+                            src_range,
+                            iix,
+                            est_freq,
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    MoveInfo { moves }
+}
--- a/third_party/rust/regalloc/src/analysis_main.rs
+++ b/third_party/rust/regalloc/src/analysis_main.rs
@ -4,14 +4,17 @@ use log::{debug, info};

 use crate::analysis_control_flow::{CFGInfo, InstIxToBlockIxMap};
 use crate::analysis_data_flow::{
-    calc_def_and_use, calc_livein_and_liveout, get_range_frags, get_sanitized_reg_uses_for_func,
-    merge_range_frags,
+    calc_def_and_use, calc_livein_and_liveout, collect_move_info, compute_reg_to_ranges_maps,
+    get_range_frags, get_sanitized_reg_uses_for_func, merge_range_frags,
 };
+use crate::analysis_reftypes::do_reftypes_analysis;
 use crate::data_structures::{
-    BlockIx, RangeFrag, RangeFragIx, RangeFragMetrics, RealRange, RealRangeIx, RealReg,
-    RealRegUniverse, RegVecsAndBounds, TypedIxVec, VirtualRange, VirtualRangeIx,
+    BlockIx, MoveInfo, RangeFrag, RangeFragIx, RangeFragMetrics, RealRange, RealRangeIx, RealReg,
+    RealRegUniverse, RegClass, RegToRangesMaps, RegVecsAndBounds, TypedIxVec, VirtualRange,
+    VirtualRangeIx, VirtualReg,
 };
 use crate::sparse_set::SparseSet;
+use crate::AlgorithmWithDefaults;
 use crate::Function;

 //=============================================================================
@ -45,6 +48,10 @@ pub enum AnalysisError {
    /// Implementation limits exceeded.  The incoming function is too big.  It
    /// may contain at most 1 million basic blocks and 16 million instructions.
    ImplementationLimitsExceeded,
+
+    /// Currently LSRA can't generate stackmaps, but the client has requested LSRA *and*
+    /// stackmaps.
+    LSRACantDoStackmaps,
 }

 impl ToString for AnalysisError {
@ -65,6 +72,9 @@ impl ToString for AnalysisError {
      AnalysisError::ImplementationLimitsExceeded => {
        "implementation limits exceeded (more than 1 million blocks or 16 million insns)".to_string()
      }
+      AnalysisError::LSRACantDoStackmaps => {
+        "LSRA *and* stackmap creation requested; but this combination is not yet supported".to_string()
+      }
    }
    }
 }
@ -87,12 +97,23 @@ pub struct AnalysisInfo {
    pub(crate) estimated_frequencies: TypedIxVec<BlockIx, u32>,
    /// Maps InstIxs to BlockIxs
    pub(crate) inst_to_block_map: InstIxToBlockIxMap,
+    /// Maps from RealRegs to sets of RealRanges and VirtualRegs to sets of VirtualRanges
+    /// (all operating on indices, not the actual objects).  This is only generated in
+    /// situations where we need it, hence the `Option`.
+    pub(crate) reg_to_ranges_maps: Option<RegToRangesMaps>,
+    /// Information about registers connected by moves.  This is only generated in situations
+    /// where we need it, hence the `Option`.
+    pub(crate) move_info: Option<MoveInfo>,
 }

 #[inline(never)]
 pub fn run_analysis<F: Function>(
    func: &F,
    reg_universe: &RealRegUniverse,
+    algorithm: AlgorithmWithDefaults,
+    client_wants_stackmaps: bool,
+    reftype_class: RegClass,
+    reftyped_vregs: &Vec<VirtualReg>, // as supplied by the client
 ) -> Result<AnalysisInfo, AnalysisError> {
    info!("run_analysis: begin");
    info!(
@ -100,6 +121,12 @@ pub fn run_analysis<F: Function>(
        func.blocks().len(),
        func.insns().len()
    );
+
+    // LSRA can't do reftypes yet.  That should have been checked at the top level already.
+    if client_wants_stackmaps {
+        assert!(algorithm != AlgorithmWithDefaults::LinearScan);
+    }
+
    info!("  run_analysis: begin control flow analysis");

    // First do control flow analysis.  This is (relatively) simple.  Note that
@ -196,7 +223,9 @@ pub fn run_analysis<F: Function>(
        &liveout_sets_per_block,
    );

-    let (rlr_env, vlr_env) = merge_range_frags(
+    // These have to be mut because they may get changed below by the call to
+    // `to_reftypes_analysis`.
+    let (mut rlr_env, mut vlr_env) = merge_range_frags(
        &frag_ixs_per_reg,
        &frag_env,
        &frag_metrics_env,
@ -226,7 +255,53 @@ pub fn run_analysis<F: Function>(
        n += 1;
    }

+    // Now a bit of auxiliary info collection, which isn't really either control- or data-flow
+    // analysis.
+
+    // For BT and/or reftypes, we'll also need the reg-to-ranges maps.
+    let reg_to_ranges_maps =
+        if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
+            Some(compute_reg_to_ranges_maps(
+                func,
+                &reg_universe,
+                &rlr_env,
+                &vlr_env,
+            ))
+        } else {
+            None
+        };
+
+    // For BT and/or reftypes, we'll also need information about moves.
+    let move_info = if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
+        Some(collect_move_info(
+            func,
+            &reg_vecs_and_bounds,
+            &estimated_frequencies,
+            reg_to_ranges_maps.as_ref().unwrap(),
+            &rlr_env,
+            &vlr_env,
+            &frag_env,
+            /* want_ranges = */ client_wants_stackmaps,
+        ))
+    } else {
+        None
+    };
+
    info!("  run_analysis: end liveness analysis");
+
+    if client_wants_stackmaps {
+        info!("  run_analysis: begin reftypes analysis");
+        do_reftypes_analysis(
+            &mut rlr_env,
+            &mut vlr_env,
+            reg_to_ranges_maps.as_ref().unwrap(), /* safe because of logic just above */
+            &move_info.as_ref().unwrap(),         /* ditto */
+            reftype_class,
+            reftyped_vregs,
+        );
+        info!("  run_analysis: end reftypes analysis");
+    }
+
    info!("run_analysis: end");

    Ok(AnalysisInfo {
@ -237,5 +312,7 @@ pub fn run_analysis<F: Function>(
        range_metrics: frag_metrics_env,
        estimated_frequencies,
        inst_to_block_map,
+        reg_to_ranges_maps,
+        move_info,
    })
 }
--- a/third_party/rust/regalloc/src/analysis_reftypes.rs
+++ b/third_party/rust/regalloc/src/analysis_reftypes.rs
@ -0,0 +1,111 @@
+//! Performs a simple taint analysis, to find all live ranges that are reftyped.
+
+use crate::data_structures::{
+    MoveInfo, MoveInfoElem, RangeId, RealRange, RealRangeIx, RegClass, RegToRangesMaps, TypedIxVec,
+    VirtualRange, VirtualRangeIx, VirtualReg,
+};
+use crate::sparse_set::SparseSet;
+
+use log::debug;
+
+pub fn do_reftypes_analysis(
+    // From dataflow/liveness analysis.  Modified by setting their is_ref bit.
+    rlr_env: &mut TypedIxVec<RealRangeIx, RealRange>,
+    vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
+    // From dataflow analysis
+    reg_to_ranges_maps: &RegToRangesMaps,
+    move_info: &MoveInfo,
+    // As supplied by the client
+    reftype_class: RegClass,
+    reftyped_vregs: &Vec<VirtualReg>,
+) {
+    // The game here is: starting with `reftyped_vregs`, find *all* the VirtualRanges and
+    // RealRanges to which that refness can flow, via instructions which the client's `is_move`
+    // function considers to be moves.
+
+    // We have `move_info`, which tells us which regs (both real and virtual) are connected by
+    // moves.  However, that's not directly useful -- we need to know which *ranges* are
+    // connected by moves.  So first, convert `move_info` into a set of range-pairs.
+
+    let mut range_pairs = Vec::<(RangeId, RangeId)>::new(); // (DST, SRC)
+
+    debug!("do_reftypes_analysis starting");
+
+    for &MoveInfoElem {
+        dst,
+        src,
+        src_range,
+        dst_range,
+        iix,
+        ..
+    } in &move_info.moves
+    {
+        // Don't waste time processing moves which can't possibly be of reftyped values.
+        if dst.get_class() != reftype_class {
+            continue;
+        }
+        debug!(
+            "move from {:?} (range {:?}) to {:?} (range {:?}) at inst {:?}",
+            src, src_range, dst, dst_range, iix
+        );
+        range_pairs.push((dst_range, src_range));
+    }
+
+    // We have to hand the range-pairs that must be a superset of the moves that could possibly
+    // carry reftyped values.  Now compute the starting set of reftyped virtual ranges.  This
+    // can serve as the starting value for the following fixpoint iteration.
+
+    let mut reftyped_ranges = SparseSet::<RangeId>::empty();
+    for vreg in reftyped_vregs {
+        // If this fails, the client has been telling is that some virtual reg is reftyped, yet
+        // it doesn't belong to the class of regs that it claims can carry refs.  So the client
+        // is buggy.
+        debug_assert!(vreg.get_class() == reftype_class);
+        for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[vreg.get_index()] {
+            debug!("range {:?} is reffy due to reffy vreg {:?}", vlrix, vreg);
+            reftyped_ranges.insert(RangeId::new_virtual(*vlrix));
+        }
+    }
+
+    // Now, finally, compute the fixpoint resulting from repeatedly mapping `reftyped_ranges`
+    // through `range_pairs`. XXXX this looks dangerously expensive .. reimplement.
+    //
+    // Later .. this is overkill.  All that is needed is a DFS of the directed graph in which
+    // the nodes are the union of the RealRange(Ixs) and the VirtualRange(Ixs), and whose edges
+    // are exactly what we computed into `range_pairs`.  This graph then needs to be searched
+    // from each root in `reftyped_ranges`.
+    loop {
+        let card_before = reftyped_ranges.card();
+
+        for (dst_lr_id, src_lr_id) in &range_pairs {
+            if reftyped_ranges.contains(*src_lr_id) {
+                debug!("reftyped range {:?} -> {:?}", src_lr_id, dst_lr_id);
+                reftyped_ranges.insert(*dst_lr_id);
+            }
+        }
+
+        let card_after = reftyped_ranges.card();
+        if card_after == card_before {
+            // Since we're only ever adding items to `reftyped_ranges`, and it has set
+            // semantics, checking that the cardinality is unchanged is an adequate check for
+            // having reached a (the minimal?) fixpoint.
+            break;
+        }
+    }
+
+    // Finally, annotate rlr_env/vlr_env with the results of the analysis.  (That was the whole
+    // point!)
+    for lr_id in reftyped_ranges.iter() {
+        if lr_id.is_real() {
+            let rrange = &mut rlr_env[lr_id.to_real()];
+            debug_assert!(!rrange.is_ref);
+            debug!(" -> rrange {:?} is reffy", lr_id.to_real());
+            rrange.is_ref = true;
+        } else {
+            let vrange = &mut vlr_env[lr_id.to_virtual()];
+            debug_assert!(!vrange.is_ref);
+            debug!(" -> rrange {:?} is reffy", lr_id.to_virtual());
+            vrange.is_ref = true;
+        }
+    }
+}
--- a/third_party/rust/regalloc/src/avl_tree.rs
+++ b/third_party/rust/regalloc/src/avl_tree.rs
@ -818,6 +818,45 @@ impl<T: Clone + PartialOrd> AVLTree<T> {
        }
    }

+    // Find `item` in the tree, and replace it with `replacement`.  `item` and `replacement`
+    // must compare equal per the comparison function `cmp`.  Returns a bool indicating whether
+    // `item` was found (and hence, replaced).  There's no comparison fast-path here
+    // (meaning, `cmp` is `&F` and not `Option<&F>`) only because so far there is no use case
+    // for it.
+    pub fn find_and_replace<F>(&mut self, item: T, replacement: T, cmp: &F) -> bool
+    where
+        F: Fn(T, T) -> Option<Ordering>,
+    {
+        let mut n = self.root;
+        loop {
+            if n == AVL_NULL {
+                return false;
+            }
+            let cmp_arg_left: T = item.clone();
+            let cmp_arg_right: T = self.pool[n as usize].item.clone();
+            match cmp(cmp_arg_left, cmp_arg_right) {
+                Some(Ordering::Less) => {
+                    n = self.pool[n as usize].left;
+                }
+                Some(Ordering::Greater) => {
+                    n = self.pool[n as usize].right;
+                }
+                Some(Ordering::Equal) => {
+                    // Do what we can to ensure the caller can't mess up the total ordering in
+                    // the tree.  This is more restrictive than it needs to be, but loosening
+                    // it requires finding the largest item below `item` and the smallest one
+                    // above it, which is expensive.
+                    assert!(cmp(item, replacement.clone()) == Some(Ordering::Equal));
+                    self.pool[n as usize].item = replacement.clone();
+                    return true;
+                }
+                None => {
+                    panic!("AVLTree::find_and_replace: unordered elements in search!");
+                }
+            }
+        }
+    }
+
    // Determine whether an item is in the tree.
    // sewardj 2020Mar31: this is not used; I assume all users of the trees
    // do their own custom traversals.  Remove #[cfg(test)] if any real uses
--- a/third_party/rust/regalloc/src/bt_coalescing_analysis.rs
+++ b/third_party/rust/regalloc/src/bt_coalescing_analysis.rs
@ -30,8 +30,8 @@ use log::{debug, info, log_enabled, Level};
 use smallvec::{smallvec, SmallVec};

 use crate::data_structures::{
-    BlockIx, InstIx, InstPoint, RangeFrag, RangeFragIx, RealRange, RealRangeIx, RealReg,
-    RealRegUniverse, Reg, RegVecsAndBounds, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
+    InstIx, InstPoint, MoveInfo, MoveInfoElem, RangeFrag, RangeFragIx, RealRange, RealRangeIx,
+    RealReg, RealRegUniverse, RegToRangesMaps, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
    VirtualReg,
 };
 use crate::union_find::{ToFromU32, UnionFind, UnionFindEquivClasses};
@ -132,197 +132,113 @@ impl ToFromU32 for VirtualRangeIx {
 #[inline(never)]
 pub fn do_coalescing_analysis<F: Function>(
    func: &F,
-    reg_vecs_and_bounds: &RegVecsAndBounds,
+    univ: &RealRegUniverse,
    rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
    vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
    frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
-    est_freqs: &TypedIxVec<BlockIx, u32>,
-    univ: &RealRegUniverse,
+    reg_to_ranges_maps: &RegToRangesMaps,
+    move_info: &MoveInfo,
 ) -> (
    TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>>,
    UnionFindEquivClasses<VirtualRangeIx>,
    TypedIxVec<InstIx, bool>,
-    Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>>,
 ) {
    info!("");
    info!("do_coalescing_analysis: begin");
-    // We have in hand the virtual live ranges.  Each of these carries its
-    // associated vreg.  So in effect we have a VLR -> VReg mapping.  We now
-    // invert that, so as to generate a mapping from VRegs to their containing
-    // VLRs.
-    //
-    // Note that multiple VLRs may map to the same VReg.  So the inverse mapping
-    // will actually be from VRegs to a set of VLRs.  In most cases, we expect
-    // the virtual-registerised-code given to this allocator to be derived from
-    // SSA, in which case each VReg will have only one VLR.  So in this case,
-    // the cost of first creating the mapping, and then looking up all the VRegs
-    // in moves in it, will have cost linear in the size of the input function.
-    //
-    // It would be convenient here to know how many VRegs there are ahead of
-    // time, but until then we'll discover it dynamically.
-    // NB re the SmallVec.  That has set semantics (no dups)
-    // FIXME use SmallVec for the VirtualRangeIxs.  Or even a sparse set.
-    let mut vreg_to_vlrs_map = Vec::</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>>::new();

-    for (vlr, n) in vlr_env.iter().zip(0..) {
-        let vlrix = VirtualRangeIx::new(n);
-        let vreg: VirtualReg = vlr.vreg;
-        // Now we know that there's a VLR `vlr` that is for VReg `vreg`.  Update
-        // the inverse mapping accordingly.  That may involve resizing it, since
-        // we have no idea of the order in which we will first encounter VRegs.
-        // By contrast, we know we are stepping sequentially through the VLR
-        // (index) space, and we'll never see the same VLRIx twice.  So there's no
-        // need to check for dups when adding a VLR index to an existing binding
-        // for a VReg.
-        let vreg_ix = vreg.get_index();
+    // There follow four closures, which are used to find out whether a real or virtual reg has
+    // a last use or first def at some instruction.  This is the central activity of the
+    // coalescing analysis -- finding move instructions that are the last def for the src reg
+    // and the first def for the dst reg.

-        while vreg_to_vlrs_map.len() <= vreg_ix {
-            vreg_to_vlrs_map.push(smallvec![]); // This is very un-clever
-        }
-
-        vreg_to_vlrs_map[vreg_ix].push(vlrix);
-    }
-
-    // Same for the real live ranges
-    let mut rreg_to_rlrs_map = Vec::</*rreg index,*/ Vec<RealRangeIx>>::new();
-
-    for (rlr, n) in rlr_env.iter().zip(0..) {
-        let rlrix = RealRangeIx::new(n);
-        let rreg: RealReg = rlr.rreg;
-        let rreg_ix = rreg.get_index();
-
-        while rreg_to_rlrs_map.len() <= rreg_ix {
-            rreg_to_rlrs_map.push(vec![]); // This is very un-clever
-        }
-
-        rreg_to_rlrs_map[rreg_ix].push(rlrix);
-    }
-
-    // And what do we got?
-    //for (vlrixs, vreg) in vreg_to_vlrs_map.iter().zip(0..) {
-    //  println!("QQQQ vreg v{:?} -> vlrixs {:?}", vreg, vlrixs);
-    //}
-    //for (rlrixs, rreg) in rreg_to_rlrs_map.iter().zip(0..) {
-    //  println!("QQQQ rreg r{:?} -> rlrixs {:?}", rreg, rlrixs);
-    //}
-
-    // Range end checks for VRegs.  The XX means either "Last use" or "First
-    // def", depending on the boolean parameter.
-    let doesVRegHaveXXat
-    // `xxIsLastUse` is true means "XX is last use"
-    // `xxIsLastUse` is false means "XX is first def"
-    = |xxIsLastUse: bool, vreg: VirtualReg, iix: InstIx|
-    -> Option<VirtualRangeIx> {
-      let vreg_no = vreg.get_index();
-      let vlrixs = &vreg_to_vlrs_map[vreg_no];
-      for vlrix in vlrixs {
-        for frag in &vlr_env[*vlrix].sorted_frags.frags {
-          if xxIsLastUse {
-            // We're checking to see if `vreg` has a last use in this block
-            // (well, technically, a fragment end in the block; we don't care if
-            // it is later redefined in the same block) .. anyway ..
-            // We're checking to see if `vreg` has a last use in this block
-            // at `iix`.u
-            if frag.last == InstPoint::new_use(iix) {
-              return Some(*vlrix);
-            }
-          } else {
-            // We're checking to see if `vreg` has a first def in this block
-            // at `iix`.d
-            if frag.first == InstPoint::new_def(iix) {
-              return Some(*vlrix);
-            }
-          }
-        }
-      }
-      None
-    };
-
-    // Range end checks for RRegs.  XX has same meaning as above.
-    let doesRRegHaveXXat
-    // `xxIsLastUse` is true means "XX is last use"
-    // `xxIsLastUse` is false means "XX is first def"
-    = |xxIsLastUse: bool, rreg: RealReg, iix: InstIx|
-    -> Option<RealRangeIx> {
-      let rreg_no = rreg.get_index();
-      let rlrixs = &rreg_to_rlrs_map[rreg_no];
-      for rlrix in rlrixs {
-        let frags = &rlr_env[*rlrix].sorted_frags;
-        for fix in &frags.frag_ixs {
-          let frag = &frag_env[*fix];
-          if xxIsLastUse {
-            // We're checking to see if `rreg` has a last use in this block
-            // at `iix`.u
-            if frag.last == InstPoint::new_use(iix) {
-              return Some(*rlrix);
-            }
-          } else {
-            // We're checking to see if `rreg` has a first def in this block
-            // at `iix`.d
-            if frag.first == InstPoint::new_def(iix) {
-              return Some(*rlrix);
-            }
-          }
-        }
-      }
-      None
-    };
-
-    // Make up a vector of registers that are connected by moves:
-    //
-    //    (dstReg, srcReg, transferring insn, estimated execution count of the
-    //                                        containing block)
-    //
-    // This can contain real-to-real moves, which we obviously can't do anything
-    // about.  We'll remove them in the next pass.
-    let mut connectedByMoves = Vec::<(Reg, Reg, InstIx, u32)>::new();
-    for b in func.blocks() {
-        let block_eef = est_freqs[b];
-        for iix in func.block_insns(b) {
-            let insn = &func.get_insn(iix);
-            let im = func.is_move(insn);
-            match im {
-                None => {}
-                Some((wreg, reg)) => {
-                    let iix_bounds = &reg_vecs_and_bounds.bounds[iix];
-                    // It might seem strange to assert that `defs_len` and/or
-                    // `uses_len` is <= 1 rather than == 1.  The reason is
-                    // that either or even both registers might be ones which
-                    // are not available to the allocator.  Hence they will
-                    // have been removed by the sanitisation machinery before
-                    // we get to this point.  If either is missing, we
-                    // unfortunately can't coalesce the move away, and just
-                    // have to live with it.
-                    //
-                    // If any of the following five assertions fail, the
-                    // client's `is_move` is probably lying to us.
-                    assert!(iix_bounds.uses_len <= 1);
-                    assert!(iix_bounds.defs_len <= 1);
-                    assert!(iix_bounds.mods_len == 0);
-                    if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
-                        let reg_vecs = &reg_vecs_and_bounds.vecs;
-                        assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
-                        assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
-                        connectedByMoves.push((wreg.to_reg(), reg, iix, block_eef));
-                    }
+    // Range checks for VRegs -- last use.
+    let doesVRegHaveLastUseAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+        let vreg_no = vreg.get_index();
+        let vlrixs = &reg_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
+        for vlrix in vlrixs {
+            for frag in &vlr_env[*vlrix].sorted_frags.frags {
+                // We're checking to see if `vreg` has a last use in this block
+                // (well, technically, a fragment end in the block; we don't care if
+                // it is later redefined in the same block) .. anyway ..
+                // We're checking to see if `vreg` has a last use in this block
+                // at `iix`.u
+                if frag.last == InstPoint::new_use(iix) {
+                    return Some(*vlrix);
                }
            }
        }
-    }
+        None
+    };

-    // XX these sub-vectors could contain duplicates, I suppose, for example if
-    // there are two identical copy insns at different points on the "boundary"
-    // for some VLR.  I don't think it matters though since we're going to rank
-    // the hints by strength and then choose at most one.
+    // Range checks for VRegs -- first def.
+    let doesVRegHaveFirstDefAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+        let vreg_no = vreg.get_index();
+        let vlrixs = &reg_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
+        for vlrix in vlrixs {
+            for frag in &vlr_env[*vlrix].sorted_frags.frags {
+                // We're checking to see if `vreg` has a first def in this block at `iix`.d
+                if frag.first == InstPoint::new_def(iix) {
+                    return Some(*vlrix);
+                }
+            }
+        }
+        None
+    };
+
+    // Range checks for RRegs -- last use.
+    let doesRRegHaveLastUseAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+        let rreg_no = rreg.get_index();
+        let rlrixs = &reg_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
+        for rlrix in rlrixs {
+            let frags = &rlr_env[*rlrix].sorted_frags;
+            for fix in &frags.frag_ixs {
+                let frag = &frag_env[*fix];
+                // We're checking to see if `rreg` has a last use in this block at `iix`.u
+                if frag.last == InstPoint::new_use(iix) {
+                    return Some(*rlrix);
+                }
+            }
+        }
+        None
+    };
+
+    // Range checks for RRegs -- first def.
+    let doesRRegHaveFirstDefAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+        let rreg_no = rreg.get_index();
+        let rlrixs = &reg_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
+        for rlrix in rlrixs {
+            let frags = &rlr_env[*rlrix].sorted_frags;
+            for fix in &frags.frag_ixs {
+                let frag = &frag_env[*fix];
+                // We're checking to see if `rreg` has a first def in this block at `iix`.d
+                if frag.first == InstPoint::new_def(iix) {
+                    return Some(*rlrix);
+                }
+            }
+        }
+        None
+    };
+
+    // RETURNED TO CALLER
+    // Hints for each VirtualRange.  Note that the SmallVecs could contain duplicates, I
+    // suppose, for example if there are two identical copy insns at different points on the
+    // "boundary" for some VLR.  I don't think it matters though since we're going to rank the
+    // hints by strength and then choose at most one.
    let mut hints = TypedIxVec::<VirtualRangeIx, SmallVec<[Hint; 8]>>::new();
    hints.resize(vlr_env.len(), smallvec![]);

+    // RETURNED TO CALLER
+    // A vector that simply records which insns are v-to-v boundary moves, as established by the
+    // analysis below.  This info is collected here because (1) the caller (BT) needs to have it
+    // and (2) this is the first point at which we can efficiently compute it.
    let mut is_vv_boundary_move = TypedIxVec::<InstIx, bool>::new();
    is_vv_boundary_move.resize(func.insns().len() as u32, false);

+    // RETURNED TO CALLER (after finalisation)
    // The virtual-to-virtual equivalence classes we're collecting.
    let mut vlrEquivClassesUF = UnionFind::<VirtualRangeIx>::new(vlr_env.len() as usize);

+    // Not returned to caller; for use only in this function.
    // A list of `VirtualRange`s for which the `total_cost` (hence also their
    // `spill_cost`) should be adjusted downwards by the supplied `u32`.  We
    // can't do this directly in the loop below due to borrowing constraints,
@ -330,18 +246,25 @@ pub fn do_coalescing_analysis<F: Function>(
    // loop.
    let mut decVLRcosts = Vec::<(VirtualRangeIx, VirtualRangeIx, u32)>::new();

-    for (rDst, rSrc, iix, block_eef) in connectedByMoves {
+    for MoveInfoElem {
+        dst,
+        src,
+        iix,
+        est_freq,
+        ..
+    } in &move_info.moves
+    {
        debug!(
-            "QQQQ connectedByMoves {:?} {:?} <- {:?} (block_eef {})",
-            iix, rDst, rSrc, block_eef
+            "connected by moves: {:?} {:?} <- {:?} (est_freq {})",
+            iix, dst, src, est_freq
        );
-        match (rDst.is_virtual(), rSrc.is_virtual()) {
+        match (dst.is_virtual(), src.is_virtual()) {
            (true, true) => {
                // Check for a V <- V hint.
-                let rSrcV = rSrc.to_virtual_reg();
-                let rDstV = rDst.to_virtual_reg();
-                let mb_vlrixSrc = doesVRegHaveXXat(/*xxIsLastUse=*/ true, rSrcV, iix);
-                let mb_vlrixDst = doesVRegHaveXXat(/*xxIsLastUse=*/ false, rDstV, iix);
+                let srcV = src.to_virtual_reg();
+                let dstV = dst.to_virtual_reg();
+                let mb_vlrixSrc = doesVRegHaveLastUseAt(srcV, *iix);
+                let mb_vlrixDst = doesVRegHaveFirstDefAt(dstV, *iix);
                if mb_vlrixSrc.is_some() && mb_vlrixDst.is_some() {
                    let vlrixSrc = mb_vlrixSrc.unwrap();
                    let vlrixDst = mb_vlrixDst.unwrap();
@ -353,39 +276,39 @@ pub fn do_coalescing_analysis<F: Function>(
                        // Add hints for both VLRs, since we don't know which one will
                        // assign first.  Indeed, a VLR may be assigned and un-assigned
                        // arbitrarily many times.
-                        hints[vlrixSrc].push(Hint::SameAs(vlrixDst, block_eef));
-                        hints[vlrixDst].push(Hint::SameAs(vlrixSrc, block_eef));
+                        hints[vlrixSrc].push(Hint::SameAs(vlrixDst, *est_freq));
+                        hints[vlrixDst].push(Hint::SameAs(vlrixSrc, *est_freq));
                        vlrEquivClassesUF.union(vlrixDst, vlrixSrc);
-                        is_vv_boundary_move[iix] = true;
+                        is_vv_boundary_move[*iix] = true;
                        // Reduce the total cost, and hence the spill cost, of
                        // both `vlrixSrc` and `vlrixDst`.  This is so as to reduce to
                        // zero, the cost of a VLR whose only instructions are its
                        // v-v boundary copies.
-                        debug!("QQQQ reduce cost of {:?} and {:?}", vlrixSrc, vlrixDst);
-                        decVLRcosts.push((vlrixSrc, vlrixDst, 1 * block_eef));
+                        debug!("reduce cost of {:?} and {:?}", vlrixSrc, vlrixDst);
+                        decVLRcosts.push((vlrixSrc, vlrixDst, 1 * est_freq));
                    }
                }
            }
            (true, false) => {
                // Check for a V <- R hint.
-                let rSrcR = rSrc.to_real_reg();
-                let rDstV = rDst.to_virtual_reg();
-                let mb_rlrSrc = doesRRegHaveXXat(/*xxIsLastUse=*/ true, rSrcR, iix);
-                let mb_vlrDst = doesVRegHaveXXat(/*xxIsLastUse=*/ false, rDstV, iix);
+                let srcR = src.to_real_reg();
+                let dstV = dst.to_virtual_reg();
+                let mb_rlrSrc = doesRRegHaveLastUseAt(srcR, *iix);
+                let mb_vlrDst = doesVRegHaveFirstDefAt(dstV, *iix);
                if mb_rlrSrc.is_some() && mb_vlrDst.is_some() {
                    let vlrDst = mb_vlrDst.unwrap();
-                    hints[vlrDst].push(Hint::Exactly(rSrcR, block_eef));
+                    hints[vlrDst].push(Hint::Exactly(srcR, *est_freq));
                }
            }
            (false, true) => {
                // Check for a R <- V hint.
-                let rSrcV = rSrc.to_virtual_reg();
-                let rDstR = rDst.to_real_reg();
-                let mb_vlrSrc = doesVRegHaveXXat(/*xxIsLastUse=*/ true, rSrcV, iix);
-                let mb_rlrDst = doesRRegHaveXXat(/*xxIsLastUse=*/ false, rDstR, iix);
+                let srcV = src.to_virtual_reg();
+                let dstR = dst.to_real_reg();
+                let mb_vlrSrc = doesVRegHaveLastUseAt(srcV, *iix);
+                let mb_rlrDst = doesRRegHaveFirstDefAt(dstR, *iix);
                if mb_vlrSrc.is_some() && mb_rlrDst.is_some() {
                    let vlrSrc = mb_vlrSrc.unwrap();
-                    hints[vlrSrc].push(Hint::Exactly(rDstR, block_eef));
+                    hints[vlrSrc].push(Hint::Exactly(dstR, *est_freq));
                }
            }
            (false, false) => {
@ -468,10 +391,5 @@ pub fn do_coalescing_analysis<F: Function>(
    info!("do_coalescing_analysis: end");
    info!("");

-    (
-        hints,
-        vlrEquivClasses,
-        is_vv_boundary_move,
-        vreg_to_vlrs_map,
-    )
+    (hints, vlrEquivClasses, is_vv_boundary_move)
 }
--- a/third_party/rust/regalloc/src/bt_commitment_map.rs
+++ b/third_party/rust/regalloc/src/bt_commitment_map.rs
@ -6,61 +6,62 @@
 use std::cmp::Ordering;
 use std::fmt;

-use crate::avl_tree::AVLTree;
+use crate::avl_tree::{AVLTree, AVL_NULL};
 use crate::data_structures::{
-    cmp_range_frags, RangeFrag, RangeFragIx, SortedRangeFragIxs, SortedRangeFrags, TypedIxVec,
-    VirtualRangeIx,
+    cmp_range_frags, InstPoint, RangeFrag, RangeFragIx, RangeId, SortedRangeFragIxs,
+    SortedRangeFrags, TypedIxVec,
 };

 //=============================================================================
 // Per-real-register commitment maps
 //

-// Something that pairs a fragment index with the index of the virtual range
-// to which this fragment conceptually "belongs", at least for the purposes of
-// this commitment map.  Alternatively, the `vlrix` field may be None, which
-// indicates that the associated fragment belongs to a real-reg live range and
-// is therefore non-evictable.
+// Something that pairs a fragment index with the identity of the virtual or real range to which
+// this fragment conceptually "belongs", at least for the purposes of this commitment map.  If
+// the `lr_id` field denotes a real range, the associated fragment belongs to a real-reg live
+// range and is therefore non-evictable.  The identity of the range is necessary because:
 //
-// (A fragment merely denotes a sequence of instruction (points), but within
-// the context of a commitment map for a real register, obviously any
-// particular fragment can't be part of two different virtual live ranges.)
+// * for VirtualRanges, (1) we may need to evict the mapping, so we will need to get hold of the
+//   VirtualRange, so that we have all fragments of the VirtualRange to hand, and (2) if the
+//   client requires stackmaps, we need to look at the VirtualRange to see if it is reftyped.
 //
-// Note that we don't intend to actually use the PartialOrd methods for
-// FIxAndVLRix.  However, they need to exist since we want to construct an
-// AVLTree<FIxAndVLRix>, and that requires PartialOrd for its element type.
-// For working with such trees we will supply our own comparison function;
-// hence PartialOrd here serves only to placate the typechecker.  It should
-// never actually be used.
+// * for RealRanges, only (2) applies; (1) is irrelevant since RealRange assignments are
+//   non-evictable.
+//
+// (A fragment merely denotes a sequence of instruction (points), but within the context of a
+// commitment map for a real register, obviously any particular fragment can't be part of two
+// different virtual live ranges.)
+//
+// Note that we don't intend to actually use the PartialOrd methods for RangeFragAndRangeId.
+// However, they need to exist since we want to construct an AVLTree<RangeFragAndRangeId>, and
+// that requires PartialOrd for its element type.  For working with such trees we will supply
+// our own comparison function; hence PartialOrd here serves only to placate the typechecker.
+// It should never actually be used.
 #[derive(Clone)]
-pub struct RangeFragAndVLRIx {
+pub struct RangeFragAndRangeId {
    pub frag: RangeFrag,
-    pub mb_vlrix: Option<VirtualRangeIx>,
+    pub id: RangeId,
 }
-impl RangeFragAndVLRIx {
-    fn new(frag: RangeFrag, mb_vlrix: Option<VirtualRangeIx>) -> Self {
-        Self { frag, mb_vlrix }
+impl RangeFragAndRangeId {
+    fn new(frag: RangeFrag, id: RangeId) -> Self {
+        Self { frag, id }
    }
 }
-impl PartialEq for RangeFragAndVLRIx {
+impl PartialEq for RangeFragAndRangeId {
    fn eq(&self, _other: &Self) -> bool {
        // See comments above.
-        panic!("impl PartialEq for RangeFragAndVLRIx: should never be used");
+        panic!("impl PartialEq for RangeFragAndRangeId: should never be used");
    }
 }
-impl PartialOrd for RangeFragAndVLRIx {
+impl PartialOrd for RangeFragAndRangeId {
    fn partial_cmp(&self, _other: &Self) -> Option<Ordering> {
        // See comments above.
-        panic!("impl PartialOrd for RangeFragAndVLRIx: should never be used");
+        panic!("impl PartialOrd for RangeFragAndRangeId: should never be used");
    }
 }
-impl fmt::Debug for RangeFragAndVLRIx {
+impl fmt::Debug for RangeFragAndRangeId {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        let vlrix_string = match self.mb_vlrix {
-            None => "NONE".to_string(),
-            Some(vlrix) => format!("{:?}", vlrix),
-        };
-        write!(fmt, "(FnV {:?} {})", self.frag, vlrix_string)
+        write!(fmt, "(FnV {:?} {:?})", self.frag, self.id)
    }
 }

@ -70,13 +71,10 @@ impl fmt::Debug for RangeFragAndVLRIx {

 // This indicates the current set of fragments to which some real register is
 // currently "committed".  The fragments *must* be non-overlapping.  Hence
-// they form a total order, and so they must appear in the vector sorted by
-// that order.
-//
-// Overall this is identical to SortedRangeFragIxs, except extended so that
-// each FragIx is tagged with an Option<VirtualRangeIx>.
+// they form a total order, and so we may validly build an AVL tree of them.
+
 pub struct CommitmentMap {
-    pub tree: AVLTree<RangeFragAndVLRIx>,
+    pub tree: AVLTree<RangeFragAndRangeId>,
 }
 impl fmt::Debug for CommitmentMap {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
@ -88,27 +86,20 @@ impl fmt::Debug for CommitmentMap {
 impl CommitmentMap {
    pub fn new() -> Self {
        // The AVL tree constructor needs a default value for the elements.  It
-        // will never be used.  The not-present index value will show as
+        // will never be used.  The RangeId index value will show as
        // obviously bogus if we ever try to "dereference" any part of it.
-        let dflt = RangeFragAndVLRIx::new(
-            RangeFrag::invalid_value(),
-            Some(VirtualRangeIx::invalid_value()),
-        );
+        let dflt = RangeFragAndRangeId::new(RangeFrag::invalid_value(), RangeId::invalid_value());
        Self {
-            tree: AVLTree::<RangeFragAndVLRIx>::new(dflt),
+            tree: AVLTree::<RangeFragAndRangeId>::new(dflt),
        }
    }

-    pub fn add(
-        &mut self,
-        to_add_frags: &SortedRangeFrags,
-        to_add_mb_vlrix: Option<VirtualRangeIx>,
-    ) {
+    pub fn add(&mut self, to_add_frags: &SortedRangeFrags, to_add_lr_id: RangeId) {
        for frag in &to_add_frags.frags {
-            let to_add = RangeFragAndVLRIx::new(frag.clone(), to_add_mb_vlrix);
+            let to_add = RangeFragAndRangeId::new(frag.clone(), to_add_lr_id);
            let added = self.tree.insert(
                to_add,
-                Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
+                Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
                    cmp_range_frags(&pair1.frag, &pair2.frag)
                }),
            );
@ -121,14 +112,14 @@ impl CommitmentMap {
    pub fn add_indirect(
        &mut self,
        to_add_frags: &SortedRangeFragIxs,
-        to_add_mb_vlrix: Option<VirtualRangeIx>,
+        to_add_lr_id: RangeId,
        frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
    ) {
        for fix in &to_add_frags.frag_ixs {
-            let to_add = RangeFragAndVLRIx::new(frag_env[*fix].clone(), to_add_mb_vlrix);
+            let to_add = RangeFragAndRangeId::new(frag_env[*fix].clone(), to_add_lr_id);
            let added = self.tree.insert(
                to_add,
-                Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
+                Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
                    cmp_range_frags(&pair1.frag, &pair2.frag)
                }),
            );
@ -140,12 +131,12 @@ impl CommitmentMap {

    pub fn del(&mut self, to_del_frags: &SortedRangeFrags) {
        for frag in &to_del_frags.frags {
-            // re None: we don't care what the VLRIx is, since we're deleting by
-            // RangeFrags alone.
-            let to_del = RangeFragAndVLRIx::new(frag.clone(), None);
+            // re RangeId::invalid_value(): we don't care what the RangeId is, since we're
+            // deleting by RangeFrags alone.
+            let to_del = RangeFragAndRangeId::new(frag.clone(), RangeId::invalid_value());
            let deleted = self.tree.delete(
                to_del,
-                Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
+                Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
                    cmp_range_frags(&pair1.frag, &pair2.frag)
                }),
            );
@ -154,4 +145,26 @@ impl CommitmentMap {
            assert!(deleted);
        }
    }
+
+    // Find the RangeId for the RangeFrag that overlaps `pt`, if one exists.
+    // This is conceptually equivalent to LogicalSpillSlot::get_refness_at_inst_point.
+    pub fn lookup_inst_point(&self, pt: InstPoint) -> Option<RangeId> {
+        let mut root = self.tree.root;
+        while root != AVL_NULL {
+            let root_node = &self.tree.pool[root as usize];
+            let root_item = &root_node.item;
+            if pt < root_item.frag.first {
+                // `pt` is to the left of the `root`.  So there's no
+                // overlap with `root`.  Continue by inspecting the left subtree.
+                root = root_node.left;
+            } else if root_item.frag.last < pt {
+                // Ditto for the right subtree.
+                root = root_node.right;
+            } else {
+                // `pt` overlaps the `root`, so we have what we want.
+                return Some(root_item.id);
+            }
+        }
+        None
+    }
 }
--- a/third_party/rust/regalloc/src/bt_main.rs
+++ b/third_party/rust/regalloc/src/bt_main.rs
@ -12,18 +12,21 @@ use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_
 use crate::analysis_main::{run_analysis, AnalysisInfo};
 use crate::avl_tree::{AVLTree, AVL_NULL};
 use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint};
-use crate::bt_commitment_map::{CommitmentMap, RangeFragAndVLRIx};
+use crate::bt_commitment_map::{CommitmentMap, RangeFragAndRangeId};
 use crate::bt_spillslot_allocator::SpillSlotAllocator;
 use crate::bt_vlr_priority_queue::VirtualRangePrioQ;
 use crate::data_structures::{
-    BlockIx, InstIx, InstPoint, Point, RangeFrag, RangeFragIx, RealRange, RealReg, RealRegUniverse,
-    Reg, RegVecBounds, RegVecs, Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec,
-    VirtualRange, VirtualRangeIx, VirtualReg, Writable,
+    BlockIx, InstIx, InstPoint, Map, Point, RangeFrag, RangeFragIx, RangeId, RealRange,
+    RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegVecBounds, RegVecs, RegVecsAndBounds,
+    Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec, VirtualRange, VirtualRangeIx,
+    VirtualReg, Writable,
+};
+use crate::inst_stream::{
+    edit_inst_stream, ExtPoint, InstExtPoint, InstToInsert, InstToInsertAndExtPoint,
 };
-use crate::inst_stream::{edit_inst_stream, InstToInsert, InstToInsertAndPoint};
 use crate::sparse_set::SparseSetU;
 use crate::union_find::UnionFindEquivClasses;
-use crate::{Function, RegAllocError, RegAllocResult};
+use crate::{AlgorithmWithDefaults, Function, RegAllocError, RegAllocResult, StackmapRequestInfo};

 #[derive(Clone)]
 pub struct BacktrackingOptions {
@ -75,12 +78,21 @@ impl PerRealReg {
    }

    #[inline(never)]
-    fn add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) {
-        // Commit this register to `to_add`, irrevocably.  Don't add it to
-        // `vlrixs_assigned` since we will never want to later evict the
-        // assignment.
-        self.committed
-            .add_indirect(&to_add.sorted_frags, None, frag_env);
+    fn add_RealRange(
+        &mut self,
+        to_add_rlrix: RealRangeIx,
+        rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+        frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+    ) {
+        // Commit this register to `to_add`, irrevocably.  Don't add it to `vlrixs_assigned`
+        // since we will never want to later evict the assignment.  (Also, from a types point of
+        // view that would be impossible.)
+        let to_add_rlr = &rlr_env[to_add_rlrix];
+        self.committed.add_indirect(
+            &to_add_rlr.sorted_frags,
+            RangeId::new_real(to_add_rlrix),
+            frag_env,
+        );
    }

    #[inline(never)]
@ -91,7 +103,7 @@ impl PerRealReg {
    ) {
        let to_add_vlr = &vlr_env[to_add_vlrix];
        self.committed
-            .add(&to_add_vlr.sorted_frags, Some(to_add_vlrix));
+            .add(&to_add_vlr.sorted_frags, RangeId::new_virtual(to_add_vlrix));
        assert!(!self.vlrixs_assigned.contains(to_add_vlrix));
        self.vlrixs_assigned.insert(to_add_vlrix);
    }
@ -103,6 +115,8 @@ impl PerRealReg {
        vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
    ) {
        // Remove it from `vlrixs_assigned`
+        // FIXME 2020June18: we could do this more efficiently by inspecting
+        // the return value from `delete`.
        if self.vlrixs_assigned.contains(to_del_vlrix) {
            self.vlrixs_assigned.delete(to_del_vlrix);
        } else {
@ -130,7 +144,7 @@ fn search_commitment_tree<IsAllowedToEvict>(
    running_set: &mut SparseSetU<[VirtualRangeIx; 4]>,
    running_cost: &mut SpillCost,
    // The tree to search.
-    tree: &AVLTree<RangeFragAndVLRIx>,
+    tree: &AVLTree<RangeFragAndRangeId>,
    // The RangeFrag we want to accommodate.
    pair_frag: &RangeFrag,
    spill_cost_budget: &SpillCost,
@ -156,14 +170,14 @@ where
        // Let's first consider the current node.  If we need it but it's not
        // evictable, we might as well stop now.
        if overlaps_curr {
-            // This frag has no associated VirtualRangeIx, so it is part of a
-            // RealRange, and hence not evictable.
-            if curr_node_item.mb_vlrix.is_none() {
+            // This frag is committed to a real range, not a virtual one, and hence is not
+            // evictable.
+            if curr_node_item.id.is_real() {
                return false;
            }
            // Maybe this one is a spill range, in which case, it can't be
            // evicted.
-            let vlrix_to_evict = curr_node_item.mb_vlrix.unwrap();
+            let vlrix_to_evict = curr_node_item.id.to_virtual();
            let vlr_to_evict = &vlr_env[vlrix_to_evict];
            if vlr_to_evict.spill_cost.is_infinite() {
                return false;
@ -368,6 +382,180 @@ fn print_RA_state(
    debug!(">>>>");
 }

+//=============================================================================
+// Reftype/stackmap support
+
+// This creates the artefacts for a safepoint/stackmap at some insn `iix`: the set of reftyped
+// spill slots, the spills to be placed at `iix.r` (yes, you read that right) and the reloads to
+// be placed at `iix.s`.
+//
+// This consults:
+//
+// * the commitment maps, to figure out which real registers are live and reftyped at `iix.u`.
+//
+// * the spillslot allocator, to figure out which spill slots are live and reftyped at `iix.u`.
+//
+// This may fail, meaning the request is in some way nonsensical; failure is propagated upwards.
+
+fn get_stackmap_artefacts_at(
+    spill_slot_allocator: &mut SpillSlotAllocator,
+    univ: &RealRegUniverse,
+    reftype_class: RegClass,
+    reg_vecs_and_bounds: &RegVecsAndBounds,
+    per_real_reg: &Vec<PerRealReg>,
+    rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+    vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+    iix: InstIx,
+) -> Result<(Vec<InstToInsert>, Vec<InstToInsert>, Vec<SpillSlot>), RegAllocError> {
+    // From a code generation perspective, what we need to compute is:
+    //
+    // * Sbefore: real regs that are live at `iix.u`, that are reftypes
+    //
+    // * Safter:  Sbefore - real regs written by `iix`
+    //
+    // Then:
+    //
+    // * for r in Sbefore . add "spill r" at `iix.r` *after* all the reloads that are already
+    //   there
+    //
+    // * for r in Safter . add "reload r" at `iix.s` *before* all the spills that are already
+    //   there
+    //
+    // Once those spills have been "recorded" by the `spill_slot_allocator`, we can then ask it
+    // to tell us all the reftyped spill slots at `iix.u`, and that's our stackmap! This routine
+    // only computes the stackmap and the vectors of spills and reloads.  It doesn't deal with
+    // interleaving them into the final code sequence.
+    //
+    // Note that this scheme isn't as runtime-inefficient as it sounds, at least in the
+    // SpiderMonkey use case and where `iix` is a call insn.  That's because SM's calling
+    // convention has no callee saved registers.  Hence "real regs written by `iix`" will be
+    // "all real regs" and so Safter will be empty.  And Sbefore is in any case pretty small.
+    //
+    // (/me thinks ..) hmm, if Safter is empty, then what is the point of dumping Sbefore on the
+    // stack before the GC?  For r in Sbefore, either r is the only reference to some object, in
+    // which case there's no point in presenting that ref to the GC since r is dead after call,
+    // or r isn't the only ref to the object, in which case some other ref to it must exist
+    // elsewhere in the stack, and that will keep the object alive.  Maybe this needs a rethink.
+    // Maybe the spills before the call should be only for the set Safter?
+
+    let pt = InstPoint::new_use(iix);
+
+    // Compute Sbefore.
+
+    // FIXME change this to SparseSet
+    let mut s_before = Set::<RealReg>::empty();
+
+    let rci = univ.allocable_by_class[reftype_class.rc_to_usize()];
+    if rci.is_none() {
+        return Err(RegAllocError::Other(
+            "stackmap request: no regs in specified reftype class".to_string(),
+        ));
+    }
+    let rci = rci.unwrap();
+
+    debug!("computing stackmap info at {:?}", pt);
+
+    for rreg_no in rci.first..rci.last + 1 {
+        // Get the RangeId, if any, assigned for `rreg_no` at `iix.u`.  From that we can figure
+        // out if it is reftyped.
+        let mb_range_id = per_real_reg[rreg_no].committed.lookup_inst_point(pt);
+        if let Some(range_id) = mb_range_id {
+            // `rreg_no` is live at `iix.u`.
+            let is_ref = if range_id.is_real() {
+                debug!(
+                    " real reg {:?} is real-range {:?}",
+                    rreg_no,
+                    rlr_env[range_id.to_real()]
+                );
+                rlr_env[range_id.to_real()].is_ref
+            } else {
+                debug!(
+                    " real reg {:?} is virtual-range {:?}",
+                    rreg_no,
+                    vlr_env[range_id.to_virtual()]
+                );
+                vlr_env[range_id.to_virtual()].is_ref
+            };
+            if is_ref {
+                // Finally .. we know that `rreg_no` is reftyped and live at `iix.u`.
+                let rreg = univ.regs[rreg_no].0;
+                s_before.insert(rreg);
+            }
+        }
+    }
+
+    debug!("Sbefore = {:?}", s_before);
+
+    // Compute Safter.
+
+    let mut s_after = s_before.clone();
+    let bounds = &reg_vecs_and_bounds.bounds[iix];
+    if bounds.mods_len != 0 {
+        // Only the GC is allowed to modify reftyped regs at this insn!
+        return Err(RegAllocError::Other(
+            "stackmap request: safepoint insn modifies a reftyped reg".to_string(),
+        ));
+    }
+
+    for i in bounds.defs_start..bounds.defs_start + bounds.defs_len as u32 {
+        let r_defd = reg_vecs_and_bounds.vecs.defs[i as usize];
+        if r_defd.is_real() && r_defd.get_class() == reftype_class {
+            s_after.delete(r_defd.to_real_reg());
+        }
+    }
+
+    debug!("Safter = {:?}", s_before);
+
+    // Create the spill insns, as defined by Sbefore.  This has the side effect of recording the
+    // spill in `spill_slot_allocator`, so we can later ask it to tell us all the reftyped spill
+    // slots.
+
+    let frag = RangeFrag::new(InstPoint::new_reload(iix), InstPoint::new_spill(iix));
+
+    let mut spill_insns = Vec::<InstToInsert>::new();
+    let mut where_reg_got_spilled_to = Map::<RealReg, SpillSlot>::default();
+
+    for from_reg in s_before.iter() {
+        let to_slot = spill_slot_allocator.alloc_reftyped_spillslot_for_frag(frag.clone());
+        let spill = InstToInsert::Spill {
+            to_slot,
+            from_reg: *from_reg,
+            for_vreg: None, // spill isn't associated with any virtual reg
+        };
+        spill_insns.push(spill);
+        // We also need to remember where we stashed it, so we can reload it, if it is in Safter.
+        if s_after.contains(*from_reg) {
+            where_reg_got_spilled_to.insert(*from_reg, to_slot);
+        }
+    }
+
+    // Create the reload insns, as defined by Safter.  Except, we might as well use the map we
+    // just made, since its domain is the same as Safter.
+
+    let mut reload_insns = Vec::<InstToInsert>::new();
+
+    for (to_reg, from_slot) in where_reg_got_spilled_to.iter() {
+        let reload = InstToInsert::Reload {
+            to_reg: Writable::from_reg(*to_reg),
+            from_slot: *from_slot,
+            for_vreg: None, // reload isn't associated with any virtual reg
+        };
+        reload_insns.push(reload);
+    }
+
+    // And finally .. round up all the reftyped spill slots.  That includes both "normal" spill
+    // slots that happen to hold reftyped values, as well as the "extras" we created here, to
+    // hold values of reftyped regs that are live over this instruction.
+
+    let reftyped_spillslots = spill_slot_allocator.get_reftyped_spillslots_at_inst_point(pt);
+
+    debug!("reftyped_spillslots = {:?}", reftyped_spillslots);
+
+    // And we're done!
+
+    Ok((spill_insns, reload_insns, reftyped_spillslots))
+}
+
 //=============================================================================
 // Allocator top level

@ -471,9 +659,23 @@ impl fmt::Debug for EditListItem {
 pub fn alloc_main<F: Function>(
    func: &mut F,
    reg_universe: &RealRegUniverse,
+    stackmap_request: Option<&StackmapRequestInfo>,
    use_checker: bool,
    opts: &BacktrackingOptions,
 ) -> Result<RegAllocResult<F>, RegAllocError> {
+    // -------- Initial arrangements for stackmaps --------
+    let empty_vec_vregs = vec![];
+    let empty_vec_iixs = vec![];
+    let (client_wants_stackmaps, reftype_class, reftyped_vregs, safepoint_insns) =
+        match stackmap_request {
+            Some(&StackmapRequestInfo {
+                reftype_class,
+                ref reftyped_vregs,
+                ref safepoint_insns,
+            }) => (true, reftype_class, reftyped_vregs, safepoint_insns),
+            None => (false, RegClass::INVALID, &empty_vec_vregs, &empty_vec_iixs),
+        };
+
    // -------- Perform initial liveness analysis --------
    // Note that the analysis phase can fail; hence we propagate any error.
    let AnalysisInfo {
@ -484,26 +686,38 @@ pub fn alloc_main<F: Function>(
        range_metrics: frag_metrics_env,
        estimated_frequencies: est_freqs,
        inst_to_block_map,
-        ..
-    } = run_analysis(func, reg_universe).map_err(|err| RegAllocError::Analysis(err))?;
+        reg_to_ranges_maps: mb_reg_to_ranges_maps,
+        move_info: mb_move_info,
+    } = run_analysis(
+        func,
+        reg_universe,
+        AlgorithmWithDefaults::Backtracking,
+        client_wants_stackmaps,
+        reftype_class,
+        reftyped_vregs,
+    )
+    .map_err(|err| RegAllocError::Analysis(err))?;

    assert!(reg_vecs_and_bounds.is_sanitized());
    assert!(frag_env.len() == frag_metrics_env.len());
+    assert!(mb_reg_to_ranges_maps.is_some()); // ensured by `run_analysis`
+    assert!(mb_move_info.is_some()); // ensured by `run_analysis`
+    let reg_to_ranges_maps = mb_reg_to_ranges_maps.unwrap();
+    let move_info = mb_move_info.unwrap();

-    // Also perform analysis that finds all coalesing opportunities.
+    // Also perform analysis that finds all coalescing opportunities.
    let coalescing_info = do_coalescing_analysis(
        func,
-        &reg_vecs_and_bounds,
+        &reg_universe,
        &rlr_env,
        &mut vlr_env,
        &frag_env,
-        &est_freqs,
-        &reg_universe,
+        &reg_to_ranges_maps,
+        &move_info,
    );
    let mut hints: TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>> = coalescing_info.0;
    let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> = coalescing_info.1;
    let is_vv_boundary_move: TypedIxVec<InstIx, bool> = coalescing_info.2;
-    let vreg_to_vlrs_map: Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>> = coalescing_info.3;
    assert!(hints.len() == vlr_env.len());

    // -------- Alloc main --------
@ -533,7 +747,8 @@ pub fn alloc_main<F: Function>(
        // PerRealReg
        per_real_reg.push(PerRealReg::new());
    }
-    for rlr in rlr_env.iter() {
+    for (rlrix_no, rlr) in rlr_env.iter().enumerate() {
+        let rlrix = RealRangeIx::new(rlrix_no as u32);
        let rregIndex = rlr.rreg.get_index();
        // Ignore RealRanges for RealRegs that are not part of the allocatable
        // set.  As far as the allocator is concerned, such RealRegs simply
@ -541,7 +756,7 @@ pub fn alloc_main<F: Function>(
        if rregIndex >= reg_universe.allocable {
            continue;
        }
-        per_real_reg[rregIndex].add_RealRange(&rlr, &frag_env);
+        per_real_reg[rregIndex].add_RealRange(rlrix, &rlr_env, &frag_env);
    }

    let mut edit_list_move = Vec::<EditListItem>::new();
@ -977,6 +1192,7 @@ pub fn alloc_main<F: Function>(

        let curr_vlr_vreg = curr_vlr.vreg;
        let curr_vlr_reg = curr_vlr_vreg.to_reg();
+        let curr_vlr_is_ref = curr_vlr.is_ref;

        for frag in &curr_vlr.sorted_frags.frags {
            for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) {
@ -1060,6 +1276,21 @@ pub fn alloc_main<F: Function>(
        }
        let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap();

+        // If we're spilling a reffy VLR, we'll need to tell the spillslot allocator that.  The
+        // VLR will already have been allocated to some spill slot, and relevant RangeFrags in
+        // the slot should have already been reserved for it, by the above call to
+        // `alloc_spill_slots` (although possibly relating to a prior VLR in the same
+        // equivalence class, and not this one).  However, those RangeFrags will have all been
+        // marked non-reffy, because we don't know, in general, at spillslot-allocation-time,
+        // whether a VLR will actually be spilled, and we don't want the resulting stack maps to
+        // mention stack entries which are dead at the point of the safepoint insn.  Hence the
+        // need to update those RangeFrags pertaining to just this VLR -- now that we *know*
+        // it's going to be spilled.
+        if curr_vlr.is_ref {
+            spill_slot_allocator
+                .notify_spillage_of_reftyped_vlr(spill_slot_to_use, &curr_vlr.sorted_frags);
+        }
+
        for sri in sri_vec {
            let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind {
                BridgeKind::RtoU => (Point::Reload, Point::Use),
@ -1076,6 +1307,7 @@ pub fn alloc_main<F: Function>(
                vreg: curr_vlr_vreg,
                rreg: None,
                sorted_frags: new_vlr_sfrags,
+                is_ref: curr_vlr_is_ref, // "inherit" refness
                size: 1,
                // Effectively infinite.  We'll never look at this again anyway.
                total_cost: 0xFFFF_FFFFu32,
@ -1109,7 +1341,7 @@ pub fn alloc_main<F: Function>(
                        // allocated to the same reg as the destination of the
                        // move.  That means we have to find the VLR that owns
                        // the destination vreg.
-                        for vlrix in &vreg_to_vlrs_map[dst_vreg.get_index()] {
+                        for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[dst_vreg.get_index()] {
                            if vlr_env[*vlrix].vreg == dst_vreg {
                                new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
                                break;
@ -1120,7 +1352,7 @@ pub fn alloc_main<F: Function>(
                        // Def-to-Spill bridge.  Hint that we want to be
                        // allocated to the same reg as the source of the
                        // move.
-                        for vlrix in &vreg_to_vlrs_map[src_vreg.get_index()] {
+                        for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[src_vreg.get_index()] {
                            if vlr_env[*vlrix].vreg == src_vreg {
                                new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
                                break;
@ -1315,7 +1547,7 @@ pub fn alloc_main<F: Function>(
    // Reload and spill instructions are missing.  To generate them, go through
    // the "edit list", which contains info on both how to generate the
    // instructions, and where to insert them.
-    let mut spills_n_reloads = Vec::<InstToInsertAndPoint>::new();
+    let mut spills_n_reloads = Vec::<InstToInsertAndExtPoint>::new();
    let mut num_spills = 0; // stats only
    let mut num_reloads = 0; // stats only
    for eli in &edit_list_other {
@ -1334,10 +1566,10 @@ pub fn alloc_main<F: Function>(
                let insnR = InstToInsert::Reload {
                    to_reg: Writable::from_reg(rreg),
                    from_slot: eli.slot,
-                    for_vreg: vreg,
+                    for_vreg: Some(vreg),
                };
-                let whereToR = vlr_frag.first;
-                spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
+                let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
                num_reloads += 1;
            }
            BridgeKind::RtoS => {
@ -1347,17 +1579,17 @@ pub fn alloc_main<F: Function>(
                let insnR = InstToInsert::Reload {
                    to_reg: Writable::from_reg(rreg),
                    from_slot: eli.slot,
-                    for_vreg: vreg,
+                    for_vreg: Some(vreg),
                };
-                let whereToR = vlr_frag.first;
+                let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
                let insnS = InstToInsert::Spill {
                    to_slot: eli.slot,
                    from_reg: rreg,
-                    for_vreg: vreg,
+                    for_vreg: Some(vreg),
                };
-                let whereToS = vlr_frag.last;
-                spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
-                spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
+                let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
                num_reloads += 1;
                num_spills += 1;
            }
@ -1368,10 +1600,10 @@ pub fn alloc_main<F: Function>(
                let insnS = InstToInsert::Spill {
                    to_slot: eli.slot,
                    from_reg: rreg,
-                    for_vreg: vreg,
+                    for_vreg: Some(vreg),
                };
-                let whereToS = vlr_frag.last;
-                spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
+                let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
                num_spills += 1;
            }
        }
@ -1408,10 +1640,64 @@ pub fn alloc_main<F: Function>(
        }
    }

+    // There is one of these for every entry in `safepoint_insns`.
+    let mut stackmaps = Vec::<Vec<SpillSlot>>::new();
+
+    if !safepoint_insns.is_empty() {
+        info!("alloc_main:   create safepoints and stackmaps");
+        for safepoint_iix in safepoint_insns {
+            // Create the stackmap artefacts for `safepoint_iix`.  Save the stackmap (the
+            // reftyped spillslots); we'll have to return it to the client as part of the
+            // overall allocation result.  The extra spill and reload instructions can simply
+            // be added to `spills_n_reloads` though, and `edit_inst_stream` will correctly
+            // merge them in.
+            //
+            // Note: this modifies `spill_slot_allocator`, since at this point we have to
+            // allocate spill slots to hold reftyped real regs across the safepoint insn.
+            //
+            // Because the SB (spill-before) and RA (reload-after) `ExtPoint`s are "closer" to
+            // the "core" of an instruction than the R (reload) and S (spill) `ExtPoint`s, any
+            // "normal" reload or spill ranges that are reftyped will be handled correctly.
+            // From `get_stackmap_artefacts_at`s point of view, such spill/reload ranges are
+            // just like any other real-reg live range that it will have to spill around the
+            // safepoint.  The fact that they are for spills or reloads doesn't make any
+            // difference.
+            //
+            // Note also: this call can fail; failure is propagated upwards.
+            //
+            // FIXME Passing these 3 small vectors around is inefficient.  Use SmallVec or
+            // (better) owned-by-this-function vectors instead.
+            let (spills_before, reloads_after, reftyped_spillslots) = get_stackmap_artefacts_at(
+                &mut spill_slot_allocator,
+                &reg_universe,
+                reftype_class,
+                &reg_vecs_and_bounds,
+                &per_real_reg,
+                &rlr_env,
+                &vlr_env,
+                *safepoint_iix,
+            )?;
+            stackmaps.push(reftyped_spillslots);
+            for spill_before in spills_before {
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(
+                    spill_before,
+                    InstExtPoint::new(*safepoint_iix, ExtPoint::SpillBefore),
+                ));
+            }
+            for reload_after in reloads_after {
+                spills_n_reloads.push(InstToInsertAndExtPoint::new(
+                    reload_after,
+                    InstExtPoint::new(*safepoint_iix, ExtPoint::ReloadAfter),
+                ));
+            }
+        }
+    }
+
    info!("alloc_main:   edit_inst_stream");

-    let final_insns_and_targetmap__or_err = edit_inst_stream(
+    let final_insns_and_targetmap_and_new_safepoints__or_err = edit_inst_stream(
        func,
+        &safepoint_insns,
        spills_n_reloads,
        &iixs_to_nop_out,
        frag_map,
@ -1423,7 +1709,7 @@ pub fn alloc_main<F: Function>(

    // ======== BEGIN Create the RegAllocResult ========

-    match final_insns_and_targetmap__or_err {
+    match final_insns_and_targetmap_and_new_safepoints__or_err {
        Ok((ref final_insns, ..)) => {
            info!(
                "alloc_main:   out: VLRs: {} initially, {} processed",
@ -1450,16 +1736,17 @@ pub fn alloc_main<F: Function>(
        }
    }

-    let (final_insns, target_map, orig_insn_map) = match final_insns_and_targetmap__or_err {
-        Err(e) => {
-            info!("alloc_main: fail");
-            return Err(e);
-        }
-        Ok(pair) => {
-            info!("alloc_main:   creating RegAllocResult");
-            pair
-        }
-    };
+    let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
+        match final_insns_and_targetmap_and_new_safepoints__or_err {
+            Err(e) => {
+                info!("alloc_main: fail");
+                return Err(e);
+            }
+            Ok(quad) => {
+                info!("alloc_main:   creating RegAllocResult");
+                quad
+            }
+        };

    // Compute clobbered registers with one final, quick pass.
    //
@ -1475,7 +1762,7 @@ pub fn alloc_main<F: Function>(

    let mut clobbered_registers: Set<RealReg> = Set::empty();

-    // We'll dump all the reg uses in here.  We don't care the bounds, so just
+    // We'll dump all the reg uses in here.  We don't care about the bounds, so just
    // pass a dummy one in the loop.
    let mut reg_vecs = RegVecs::new(/*sanitized=*/ false);
    let mut dummy_bounds = RegVecBounds::new();
@ -1509,13 +1796,17 @@ pub fn alloc_main<F: Function>(
        block_annotations = Some(anns);
    }

+    assert!(stackmaps.len() == safepoint_insns.len());
+    assert!(new_safepoint_insns.len() == safepoint_insns.len());
    let ra_res = RegAllocResult {
        insns: final_insns,
        target_map,
-        orig_insn_map,
+        orig_insn_map: new_to_old_insn_map,
        clobbered_registers,
        num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32,
        block_annotations,
+        stackmaps,
+        new_safepoint_insns,
    };

    info!("alloc_main: end");
--- a/third_party/rust/regalloc/src/bt_spillslot_allocator.rs
+++ b/third_party/rust/regalloc/src/bt_spillslot_allocator.rs
@ -5,7 +5,7 @@

 use crate::avl_tree::{AVLTree, AVL_NULL};
 use crate::data_structures::{
-    cmp_range_frags, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
+    cmp_range_frags, InstPoint, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
    VirtualRangeIx,
 };
 use crate::union_find::UnionFindEquivClasses;
@ -28,6 +28,22 @@ use crate::Function;
 //=============================================================================
 // Logical spill slots

+// In the trees, we keep track of which frags are reftyped, so we can later create stackmaps by
+// slicing all of the trees at some `InstPoint`.  Unfortunately this requires storing 65 bits of
+// data in each node -- 64 bits for the RangeFrag and 1 bit for the reftype.  A TODO would be to
+// steal one bit from the RangeFrag.  For now though, we do the simple thing.
+
+#[derive(Clone, PartialEq, PartialOrd)]
+struct RangeFragAndRefness {
+    frag: RangeFrag,
+    is_ref: bool,
+}
+impl RangeFragAndRefness {
+    fn new(frag: RangeFrag, is_ref: bool) -> Self {
+        Self { frag, is_ref }
+    }
+}
+
 // We keep one of these for every "logical spill slot" in use.
 enum LogicalSpillSlot {
    // This slot is in use and can hold values of size `size` (only).  Note that
@ -36,7 +52,10 @@ enum LogicalSpillSlot {
    // `SpillSlotAllocator::slots`, the next `size` - 1 entries must be
    // `Unavail`.  This is a hard invariant, violation of which will cause
    // overlapping spill slots and potential chaos.
-    InUse { size: u32, tree: AVLTree<RangeFrag> },
+    InUse {
+        size: u32,
+        tree: AVLTree<RangeFragAndRefness>,
+    },
    // This slot is unavailable, as described above.  It's unavailable because
    // it holds some part of the values associated with the nearest lower
    // numbered entry which isn't `Unavail`, and that entry must be an `InUse`
@ -53,13 +72,13 @@ impl LogicalSpillSlot {
    fn is_InUse(&self) -> bool {
        !self.is_Unavail()
    }
-    fn get_tree(&self) -> &AVLTree<RangeFrag> {
+    fn get_tree(&self) -> &AVLTree<RangeFragAndRefness> {
        match self {
            LogicalSpillSlot::InUse { ref tree, .. } => tree,
            LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_tree"),
        }
    }
-    fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFrag> {
+    fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFragAndRefness> {
        match self {
            LogicalSpillSlot::InUse { ref mut tree, .. } => tree,
            LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_mut_tree"),
@ -71,6 +90,62 @@ impl LogicalSpillSlot {
            LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_size"),
        }
    }
+    // If this spill slot is occupied at `pt`, return the refness of the value (VirtualRange)
+    // stored in it.  This is conceptually equivalent to CommitmentMap::lookup_inst_point.
+    fn get_refness_at_inst_point(&self, pt: InstPoint) -> Option<bool> {
+        match self {
+            LogicalSpillSlot::InUse { size: 1, tree } => {
+                // Search the tree to see if a reffy commitment intersects `pt`.
+                let mut root = tree.root;
+                while root != AVL_NULL {
+                    let root_node = &tree.pool[root as usize];
+                    let root_item = &root_node.item;
+                    if pt < root_item.frag.first {
+                        // `pt` is to the left of the `root`.  So there's no
+                        // overlap with `root`.  Continue by inspecting the left subtree.
+                        root = root_node.left;
+                    } else if root_item.frag.last < pt {
+                        // Ditto for the right subtree.
+                        root = root_node.right;
+                    } else {
+                        // `pt` overlaps the `root`, so we have what we want.
+                        return Some(root_item.is_ref);
+                    }
+                }
+                None
+            }
+            LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
+                // Slot isn't is use, or is in use but for values of some non-ref size
+                None
+            }
+        }
+    }
+}
+
+// HELPER FUNCTION
+// Find out whether it is possible to add `frag` to `tree`.
+#[inline(always)]
+fn ssal_is_add_frag_possible(tree: &AVLTree<RangeFragAndRefness>, frag: &RangeFrag) -> bool {
+    // BEGIN check `frag` for any overlap against `tree`.
+    let mut root = tree.root;
+    while root != AVL_NULL {
+        let root_node = &tree.pool[root as usize];
+        let root_item = &root_node.item;
+        if frag.last < root_item.frag.first {
+            // `frag` is entirely to the left of the `root`.  So there's no
+            // overlap with root.  Continue by inspecting the left subtree.
+            root = root_node.left;
+        } else if root_item.frag.last < frag.first {
+            // Ditto for the right subtree.
+            root = root_node.right;
+        } else {
+            // `frag` overlaps the `root`.  Give up.
+            return false;
+        }
+    }
+    // END check `frag` for any overlap against `tree`.
+    // `frag` doesn't overlap.
+    true
 }

 // HELPER FUNCTION
@ -81,38 +156,23 @@ impl LogicalSpillSlot {
 // no guarantee that elements of `frags` don't overlap `tree`.  Hence we have
 // to do a custom walk of `tree` to check for overlap; we can't just use
 // `AVLTree::contains`.
-fn ssal_is_add_possible(tree: &AVLTree<RangeFrag>, frags: &SortedRangeFrags) -> bool {
+fn ssal_is_add_possible(tree: &AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
    // Figure out whether all the frags will go in.
    for frag in &frags.frags {
-        // BEGIN check `frag` for any overlap against `tree`.
-        let mut root = tree.root;
-        while root != AVL_NULL {
-            let root_node = &tree.pool[root as usize];
-            let root_frag = root_node.item.clone();
-            if frag.last < root_frag.first {
-                // `frag` is entirely to the left of the `root`.  So there's no
-                // overlap with root.  Continue by inspecting the left subtree.
-                root = root_node.left;
-            } else if root_frag.last < frag.first {
-                // Ditto for the right subtree.
-                root = root_node.right;
-            } else {
-                // `frag` overlaps the `root`.  Give up.
-                return false;
-            }
+        if !ssal_is_add_frag_possible(&tree, frag) {
+            return false;
        }
-        // END check `frag` for any overlap against `tree`.
        // `frag` doesn't overlap.  Move on to the next one.
    }
    true
 }

 // HELPER FUNCTION
-// Try to add all of `frags` to `tree`.  Return `true` if possible, `false` if
-// not possible.  If `false` is returned, `tree` is unchanged (this is
-// important).  This routine relies on the fact that SortedFrags is
-// non-overlapping.
-fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags) -> bool {
+// Try to add all of `frags` to `tree`.  Return `true` if possible, `false` if not possible.  If
+// `false` is returned, `tree` is unchanged (this is important).  This routine relies on the
+// fact that SortedFrags is non-overlapping.  They are initially all marked as non-reffy.  That
+// may later be changed by calls to `SpillSlotAllocator::notify_spillage_of_reftyped_vlr`.
+fn ssal_add_if_possible(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
    // Check if all the frags will go in.
    if !ssal_is_add_possible(tree, frags) {
        return false;
@ -120,8 +180,10 @@ fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags)
    // They will.  So now insert them.
    for frag in &frags.frags {
        let inserted = tree.insert(
-            frag.clone(),
-            Some(&|frag1, frag2| cmp_range_frags(&frag1, &frag2)),
+            RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false),
+            Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+                cmp_range_frags(&item1.frag, &item2.frag)
+            }),
        );
        // This can't fail
        assert!(inserted);
@ -129,6 +191,27 @@ fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags)
    true
 }

+// HELPER FUNCTION
+// Let `frags` be the RangeFrags for some VirtualRange, that have already been allocated in
+// `tree`.  Mark each such RangeFrag as reffy.
+fn ssal_mark_frags_as_reftyped(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) {
+    for frag in &frags.frags {
+        // Be paranoid.  (1) `frag` must already exist in `tree`.  (2) it must not be marked as
+        // reffy.
+        let del_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false);
+        let add_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ true);
+        let replaced_ok = tree.find_and_replace(
+            del_this,
+            add_this,
+            &|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+                cmp_range_frags(&item1.frag, &item2.frag)
+            },
+        );
+        // This assertion effectively encompasses both (1) and (2) above.
+        assert!(replaced_ok);
+    }
+}
+
 //=============================================================================
 // SpillSlotAllocator: public interface

@ -155,9 +238,11 @@ impl SpillSlotAllocator {
        while self.slots.len() % (req_size as usize) != 0 {
            self.slots.push(LogicalSpillSlot::Unavail);
        }
-        // And now the new slot.
-        let dflt = RangeFrag::invalid_value();
-        let tree = AVLTree::<RangeFrag>::new(dflt);
+        // And now the new slot.  The `dflt` value is needed by `AVLTree` to initialise storage
+        // slots for tree nodes, but we will never actually see those values.  So it doesn't
+        // matter what they are.
+        let dflt = RangeFragAndRefness::new(RangeFrag::invalid_value(), false);
+        let tree = AVLTree::<RangeFragAndRefness>::new(dflt);
        let res = self.slots.len() as u32;
        self.slots.push(LogicalSpillSlot::InUse {
            size: req_size,
@ -176,6 +261,7 @@ impl SpillSlotAllocator {
        res
    }

+    // THE MAIN FUNCTION
    // Allocate spill slots for all the VirtualRanges in `vlrix`s eclass,
    // including `vlrix` itself.  Since we are allocating spill slots for
    // complete eclasses at once, none of the members of the class should
@ -191,8 +277,25 @@ impl SpillSlotAllocator {
        vlrEquivClasses: &UnionFindEquivClasses<VirtualRangeIx>,
        vlrix: VirtualRangeIx,
    ) {
+        let is_ref = vlr_env[vlrix].is_ref;
        for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
+            // "None of the VLRs in this equivalence class have an allocated spill slot."
+            // This should be true because we allocate spill slots for all of the members of an
+            // eclass at once.
            assert!(vlr_slot_env[cand_vlrix].is_none());
+
+            // "All of the VLRs in this eclass have the same ref-ness as this VLR."
+            // Why this is true is a bit subtle.  The equivalence classes are computed by
+            // `do_coalescing_analysis`, fundamentally by looking at all the move instructions
+            // and computing the transitive closure induced by them.  The ref-ness annotations
+            // on each VLR are computed in `do_reftypes_analysis`, and they are also computed
+            // as a transitive closure on the same move instructions.  Hence the results should
+            // be identical.
+            //
+            // With all that said, note that these equivalence classes are *not* guaranteed to
+            // be internally non-overlapping.  This is explained in the big block comment at the
+            // top of bt_coalescing_analysis.rs.
+            assert!(vlr_env[cand_vlrix].is_ref == is_ref);
        }

        // Do this in two passes.  It's a bit cumbersome.
@ -243,6 +346,12 @@ impl SpillSlotAllocator {
        let req_size = func.get_spillslot_size(vlrix_vreg.get_class(), vlrix_vreg);
        assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);

+        // Sanity check: if the VLR is reftyped, then it must need a 1-word slot
+        // (anything else is nonsensical.)
+        if is_ref {
+            assert!(req_size == 1);
+        }
+
        // Pass 1: find a slot which can take all VirtualRanges in `vlrix`s
        // eclass when tested individually.
        //
@ -344,4 +453,70 @@ impl SpillSlotAllocator {
            /*NOTREACHED*/
        } /* 'pass2_per_equiv_class */
    }
+
+    // STACKMAP SUPPORT
+    // Mark the `frags` for `slot_no` as being reftyped.  They are expected to already exist in
+    // the relevant tree, and not currently be marked as reftyped.
+    pub fn notify_spillage_of_reftyped_vlr(
+        &mut self,
+        slot_no: SpillSlot,
+        frags: &SortedRangeFrags,
+    ) {
+        let slot_ix = slot_no.get_usize();
+        assert!(slot_ix < self.slots.len());
+        let slot = &mut self.slots[slot_ix];
+        match slot {
+            LogicalSpillSlot::InUse { size, tree } if *size == 1 => {
+                ssal_mark_frags_as_reftyped(tree, frags)
+            }
+            _ => panic!("SpillSlotAllocator::notify_spillage_of_reftyped_vlr: invalid slot"),
+        }
+    }
+
+    // STACKMAP SUPPORT
+    // Allocate a size-1 (word!) spill slot for `frag` and return it.  The slot is marked
+    // reftyped so that a later call to `get_reftyped_spillslots_at_inst_point` will return it.
+    pub fn alloc_reftyped_spillslot_for_frag(&mut self, frag: RangeFrag) -> SpillSlot {
+        for i in 0..self.slots.len() {
+            match &mut self.slots[i] {
+                LogicalSpillSlot::InUse { size: 1, tree } => {
+                    if ssal_is_add_frag_possible(&tree, &frag) {
+                        // We're in luck.
+                        let inserted = tree.insert(
+                            RangeFragAndRefness::new(frag, /*is_ref=*/ true),
+                            Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+                                cmp_range_frags(&item1.frag, &item2.frag)
+                            }),
+                        );
+                        // This can't fail -- we just checked for it!
+                        assert!(inserted);
+                        return SpillSlot::new(i as u32);
+                    }
+                    // Otherwise move on.
+                }
+                LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
+                    // Slot isn't is use, or is in use but for values of some non-ref size.
+                    // Move on.
+                }
+            }
+        }
+        // We tried all slots, but without success.  Add a new one and try again.  This time we
+        // must succeed.  Calling recursively is a bit stupid in the sense that we then search
+        // again to find the slot we just allocated, but hey.
+        self.add_new_slot(1 /*word*/);
+        self.alloc_reftyped_spillslot_for_frag(frag) // \o/ tailcall \o/
+    }
+
+    // STACKMAP SUPPORT
+    // Examine all the spill slots at `pt` and return those that are reftyped.  This is
+    // fundamentally what creates a stack map.
+    pub fn get_reftyped_spillslots_at_inst_point(&self, pt: InstPoint) -> Vec<SpillSlot> {
+        let mut res = Vec::<SpillSlot>::new();
+        for (i, slot) in self.slots.iter().enumerate() {
+            if slot.get_refness_at_inst_point(pt) == Some(true) {
+                res.push(SpillSlot::new(i as u32));
+            }
+        }
+        res
+    }
 }
--- a/third_party/rust/regalloc/src/checker.rs
+++ b/third_party/rust/regalloc/src/checker.rs
@ -57,10 +57,9 @@

 use crate::analysis_data_flow::get_san_reg_sets_for_insn;
 use crate::data_structures::{
-    BlockIx, InstIx, InstPoint, Map, Point, RealReg, RealRegUniverse, Reg, RegSets, SpillSlot,
-    VirtualReg, Writable,
+    BlockIx, InstIx, Map, RealReg, RealRegUniverse, Reg, RegSets, SpillSlot, VirtualReg, Writable,
 };
-use crate::inst_stream::InstToInsertAndPoint;
+use crate::inst_stream::{ExtPoint, InstExtPoint, InstToInsertAndExtPoint};
 use crate::{Function, RegUsageMapper};

 use std::collections::VecDeque;
@ -478,10 +477,11 @@ impl Checker {
    }
 }

-/// A wrapper around `Checker` that assists its use with `InstsAndPoints` and `Function` together.
+/// A wrapper around `Checker` that assists its use with `InstToInsertAndExtPoint`s and
+/// `Function` together.
 pub(crate) struct CheckerContext {
    checker: Checker,
-    checker_inst_map: Map<InstPoint, Vec<Inst>>,
+    checker_inst_map: Map<InstExtPoint, Vec<Inst>>,
 }

 impl CheckerContext {
@ -490,16 +490,12 @@ impl CheckerContext {
    pub(crate) fn new<F: Function>(
        f: &F,
        ru: &RealRegUniverse,
-        insts_to_add: &Vec<InstToInsertAndPoint>,
+        insts_to_add: &Vec<InstToInsertAndExtPoint>,
    ) -> CheckerContext {
-        let mut checker_inst_map: Map<InstPoint, Vec<Inst>> = Map::default();
-        for &InstToInsertAndPoint {
-            ref inst,
-            ref point,
-        } in insts_to_add
-        {
+        let mut checker_inst_map: Map<InstExtPoint, Vec<Inst>> = Map::default();
+        for &InstToInsertAndExtPoint { ref inst, ref iep } in insts_to_add {
            let checker_insts = checker_inst_map
-                .entry(point.clone())
+                .entry(iep.clone())
                .or_insert_with(|| vec![]);
            checker_insts.push(inst.to_checker_inst());
        }
@ -521,8 +517,8 @@ impl CheckerContext {
        mapper: &RUM,
    ) -> Result<(), CheckerErrors> {
        let empty = vec![];
-        let pre_point = InstPoint::new(iix, Point::Reload);
-        let post_point = InstPoint::new(iix, Point::Spill);
+        let pre_point = InstExtPoint::new(iix, ExtPoint::Reload);
+        let post_point = InstExtPoint::new(iix, ExtPoint::Spill);

        for checker_inst in self.checker_inst_map.get(&pre_point).unwrap_or(&empty) {
            debug!("at inst {:?}: pre checker_inst: {:?}", iix, checker_inst);
--- a/third_party/rust/regalloc/src/data_structures.rs
+++ b/third_party/rust/regalloc/src/data_structures.rs
@ -1194,7 +1194,7 @@ pub struct RealRegUniverse {
    pub regs: Vec<(RealReg, String)>,

    // This is the size of the initial section of `regs` that is available to
-    // the allocator.  It must be < `regs`.len().
+    // the allocator.  It must be <= `regs`.len().
    pub allocable: usize,

    // Information about groups of allocable registers. Used to quickly address
@ -1794,6 +1794,22 @@ impl SortedRangeFragIxs {
        res.check(fenv);
        res
    }
+
+    /// Does this sorted list of range fragments contain the given instruction point?
+    pub fn contains_pt(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>, pt: InstPoint) -> bool {
+        self.frag_ixs
+            .binary_search_by(|&ix| {
+                let frag = &fenv[ix];
+                if pt < frag.first {
+                    Ordering::Greater
+                } else if pt >= frag.first && pt <= frag.last {
+                    Ordering::Equal
+                } else {
+                    Ordering::Less
+                }
+            })
+            .is_ok()
+    }
 }

 //=============================================================================
@ -1856,6 +1872,21 @@ impl SortedRangeFrags {
            }
        }
    }
+
+    /// Does this sorted list of range fragments contain the given instruction point?
+    pub fn contains_pt(&self, pt: InstPoint) -> bool {
+        self.frags
+            .binary_search_by(|frag| {
+                if pt < frag.first {
+                    Ordering::Greater
+                } else if pt >= frag.first && pt <= frag.last {
+                    Ordering::Equal
+                } else {
+                    Ordering::Less
+                }
+            })
+            .is_ok()
+    }
 }

 //=============================================================================
@ -1997,19 +2028,27 @@ impl SpillCost {
 pub struct RealRange {
    pub rreg: RealReg,
    pub sorted_frags: SortedRangeFragIxs,
+    pub is_ref: bool,
 }

 impl fmt::Debug for RealRange {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "(RR: {:?}, {:?})", self.rreg, self.sorted_frags)
+        write!(
+            fmt,
+            "(RR: {:?}{}, {:?})",
+            self.rreg,
+            if self.is_ref { " REF" } else { "" },
+            self.sorted_frags
+        )
    }
 }

 impl RealRange {
    pub fn show_with_rru(&self, univ: &RealRegUniverse) -> String {
        format!(
-            "(RR: {}, {:?})",
+            "(RR: {}{}, {:?})",
            self.rreg.to_reg().show_with_rru(univ),
+            if self.is_ref { " REF" } else { "" },
            self.sorted_frags
        )
    }
@ -2026,6 +2065,7 @@ pub struct VirtualRange {
    pub vreg: VirtualReg,
    pub rreg: Option<RealReg>,
    pub sorted_frags: SortedRangeFrags,
+    pub is_ref: bool,
    pub size: u16,
    pub total_cost: u32,
    pub spill_cost: SpillCost, // == total_cost / size
@ -2039,7 +2079,12 @@ impl VirtualRange {

 impl fmt::Debug for VirtualRange {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "(VR: {:?},", self.vreg)?;
+        write!(
+            fmt,
+            "(VR: {:?}{},",
+            self.vreg,
+            if self.is_ref { " REF" } else { "" }
+        )?;
        if self.rreg.is_some() {
            write!(fmt, " -> {:?}", self.rreg.unwrap())?;
        }
@ -2051,6 +2096,109 @@ impl fmt::Debug for VirtualRange {
    }
 }

+//=============================================================================
+// Some auxiliary/miscellaneous data structures that are useful.
+
+// Mappings from RealRegs and VirtualRegs to the sets of RealRanges and VirtualRanges that
+// belong to them.  These are needed for BT's coalescing analysis and for the dataflow analysis
+// that supports reftype handling.
+
+pub struct RegToRangesMaps {
+    // This maps RealReg indices to the set of RealRangeIxs for that RealReg.  Valid indices are
+    // real register indices for all non-sanitised real regs; that is,
+    // 0 .. RealRegUniverse::allocable, for ".." having the Rust meaning.  The Vecs of
+    // RealRangeIxs are duplicate-free.  They are Vec rather than SmallVec because they are often
+    // large, so SmallVec would just be a disadvantage here.
+    pub rreg_to_rlrs_map: Vec</*real reg ix, */ Vec<RealRangeIx>>,
+
+    // This maps VirtualReg indices to the set of VirtualRangeIxs for that VirtualReg.  Valid
+    // indices are 0 .. Function::get_num_vregs().  For functions mostly translated from SSA,
+    // most VirtualRegs will have just one VirtualRange, and there are a lot of VirtualRegs in
+    // general.  So SmallVec is a definite benefit here.
+    pub vreg_to_vlrs_map: Vec</*virtual reg ix, */ SmallVec<[VirtualRangeIx; 3]>>,
+}
+
+// MoveInfo holds info about registers connected by moves.  For each, we record the source and
+// destination of the move, the insn performing the move, and the estimated execution frequency
+// of the containing block.  The moves are not presented in any particular order, but they are
+// duplicate-free in that each such instruction will be listed only once.
+
+pub struct MoveInfoElem {
+    pub dst: Reg,
+    pub dst_range: RangeId, // possibly RangeId::invalid_value() if not requested
+    pub src: Reg,
+    pub src_range: RangeId, // possibly RangeId::invalid_value() if not requested
+    pub iix: InstIx,
+    pub est_freq: u32,
+}
+
+pub struct MoveInfo {
+    pub moves: Vec<MoveInfoElem>,
+}
+
+// Something that can be either a VirtualRangeIx or a RealRangeIx, whilst still being 32 bits
+// (by stealing one bit from those spaces).  Note that the resulting thing no longer denotes a
+// contiguous index space, and so it has a name that indicates it is an identifier rather than
+// an index.
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
+pub struct RangeId {
+    // 1 X--(31)--X is a RealRangeIx with value X--(31)--X
+    // 0 X--(31)--X is a VirtualRangeIx with value X--(31)--X
+    bits: u32,
+}
+
+impl RangeId {
+    #[inline(always)]
+    pub fn new_real(rlrix: RealRangeIx) -> Self {
+        let n = rlrix.get();
+        assert!(n <= 0x7FFF_FFFF);
+        Self {
+            bits: n | 0x8000_0000,
+        }
+    }
+    #[inline(always)]
+    pub fn new_virtual(vlrix: VirtualRangeIx) -> Self {
+        let n = vlrix.get();
+        assert!(n <= 0x7FFF_FFFF);
+        Self { bits: n }
+    }
+    #[inline(always)]
+    pub fn is_real(self) -> bool {
+        self.bits & 0x8000_0000 != 0
+    }
+    #[allow(dead_code)]
+    #[inline(always)]
+    pub fn is_virtual(self) -> bool {
+        self.bits & 0x8000_0000 == 0
+    }
+    #[inline(always)]
+    pub fn to_real(self) -> RealRangeIx {
+        assert!(self.bits & 0x8000_0000 != 0);
+        RealRangeIx::new(self.bits & 0x7FFF_FFFF)
+    }
+    #[inline(always)]
+    pub fn to_virtual(self) -> VirtualRangeIx {
+        assert!(self.bits & 0x8000_0000 == 0);
+        VirtualRangeIx::new(self.bits)
+    }
+    #[inline(always)]
+    pub fn invalid_value() -> Self {
+        // Real, and inplausibly huge
+        Self { bits: 0xFFFF_FFFF }
+    }
+}
+
+impl fmt::Debug for RangeId {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_real() {
+            self.to_real().fmt(fmt)
+        } else {
+            self.to_virtual().fmt(fmt)
+        }
+    }
+}
+
 //=============================================================================
 // Test cases

--- a/third_party/rust/regalloc/src/inst_stream.rs
+++ b/third_party/rust/regalloc/src/inst_stream.rs
@ -1,7 +1,7 @@
 use crate::checker::Inst as CheckerInst;
 use crate::checker::{CheckerContext, CheckerErrors};
 use crate::data_structures::{
-    BlockIx, InstIx, InstPoint, RangeFrag, RealReg, RealRegUniverse, SpillSlot, TypedIxVec,
+    BlockIx, InstIx, InstPoint, Point, RangeFrag, RealReg, RealRegUniverse, SpillSlot, TypedIxVec,
    VirtualReg, Writable,
 };
 use crate::{reg_maps::VrangeRegUsageMapper, Function, RegAllocError};
@ -17,12 +17,12 @@ pub(crate) enum InstToInsert {
    Spill {
        to_slot: SpillSlot,
        from_reg: RealReg,
-        for_vreg: VirtualReg,
+        for_vreg: Option<VirtualReg>,
    },
    Reload {
        to_reg: Writable<RealReg>,
        from_slot: SpillSlot,
-        for_vreg: VirtualReg,
+        for_vreg: Option<VirtualReg>,
    },
    Move {
        to_reg: Writable<RealReg>,
@ -76,14 +76,112 @@ impl InstToInsert {
    }
 }

-pub(crate) struct InstToInsertAndPoint {
-    pub(crate) inst: InstToInsert,
-    pub(crate) point: InstPoint,
+// ExtPoint is an extended version of Point.  It plays no role in dataflow analysis or in the
+// specification of live ranges.  It exists only to describe where to place the "extra"
+// spill/reload instructions required to make stackmap/reftype support work.  If there was no
+// need to support stackmaps/reftypes, ExtPoint would not be needed, and Point would be
+// adequate.
+//
+// Recall that Point can denote 4 places within an instruction, with R < U < D < S:
+//
+// * R(eload): this is where any reload insns for the insn itself are
+//   considered to live.
+//
+// * U(se): this is where the insn is considered to use values from those of
+//   its register operands that appear in a Read or Modify role.
+//
+// * D(ef): this is where the insn is considered to define new values for
+//   those of its register operands that appear in a Write or Modify role.
+//
+// * S(pill): this is where any spill insns for the insn itself are considered
+//   to live.
+//
+// ExtPoint extends that to six places, by adding a new point in between Reload and Use, and one
+// between Def and Spill, giving: R < SB < U < D < RA < S:
+//
+// * (R)eload: unchanged
+//
+// * SB (Spill before): at this point, reftyped regs will be spilled, if this insn is a safepoint
+//
+// * (U)se: unchanged
+//
+// * (D)ef: unchanged
+//
+// * RA (Reload after): at this point, reftyped regs spilled at SB will be reloaded, if needed,
+//   and if this insn is a safepoint
+//
+// * (S)pill: unchanged
+//
+// From this it can be seen that the SB and RA points are closest to the instruction "core" --
+// the U and D points.  SB and RA describe places where reftyped regs must be spilled/reloaded
+// around the core.  Because the SB-RA range falls inside the R-S range, it means the the
+// safepoint spill/reload instructions can be added after "normal" spill/reload instructions
+// have been created, and it doesn't interact with the logic to create those "normal"
+// spill/reload instructions.
+//
+// In the worst case scenario, a value could be reloaded at R, immediately spilled at SB, then
+// possibly modified in memory at the safepoint proper, reloaded at RA, and spilled at S.  That
+// is considered to be an unlikely scenario, though.
+
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum ExtPoint {
+    Reload = 0,
+    SpillBefore = 1,
+    Use = 2,
+    Def = 3,
+    ReloadAfter = 4,
+    Spill = 5,
 }

-impl InstToInsertAndPoint {
-    pub(crate) fn new(inst: InstToInsert, point: InstPoint) -> Self {
-        Self { inst, point }
+impl ExtPoint {
+    // Promote a Point to an ExtPoint
+    #[inline(always)]
+    pub fn from_point(pt: Point) -> Self {
+        match pt {
+            Point::Reload => ExtPoint::Reload,
+            Point::Use => ExtPoint::Use,
+            Point::Def => ExtPoint::Def,
+            Point::Spill => ExtPoint::Spill,
+        }
+    }
+}
+
+// As the direct analogy to InstPoint, a InstExtPoint pairs an InstIx with an ExtPoint.  In
+// contrast to InstPoint, these aren't so performance critical, so there's no fancy bit-packed
+// representation as there is for InstPoint.
+
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct InstExtPoint {
+    pub iix: InstIx,
+    pub extpt: ExtPoint,
+}
+
+impl InstExtPoint {
+    #[inline(always)]
+    pub fn new(iix: InstIx, extpt: ExtPoint) -> Self {
+        Self { iix, extpt }
+    }
+    // Promote an InstPoint to an InstExtPoint
+    #[inline(always)]
+    pub fn from_inst_point(inst_pt: InstPoint) -> Self {
+        InstExtPoint {
+            iix: inst_pt.iix(),
+            extpt: ExtPoint::from_point(inst_pt.pt()),
+        }
+    }
+}
+
+// So, finally, we can specify what we want: an instruction to insert, and a place to insert it.
+
+pub(crate) struct InstToInsertAndExtPoint {
+    pub(crate) inst: InstToInsert,
+    pub(crate) iep: InstExtPoint,
+}
+
+impl InstToInsertAndExtPoint {
+    #[inline(always)]
+    pub(crate) fn new(inst: InstToInsert, iep: InstExtPoint) -> Self {
+        Self { inst, iep }
    }
 }

@ -96,7 +194,7 @@ impl InstToInsertAndPoint {
 fn map_vregs_to_rregs<F: Function>(
    func: &mut F,
    frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
-    insts_to_add: &Vec<InstToInsertAndPoint>,
+    insts_to_add: &Vec<InstToInsertAndExtPoint>,
    iixs_to_nop_out: &Vec<InstIx>,
    reg_universe: &RealRegUniverse,
    use_checker: bool,
@ -391,12 +489,14 @@ fn map_vregs_to_rregs<F: Function>(
 #[inline(never)]
 pub(crate) fn add_spills_reloads_and_moves<F: Function>(
    func: &mut F,
-    mut insts_to_add: Vec<InstToInsertAndPoint>,
+    safepoint_insns: &Vec<InstIx>,
+    mut insts_to_add: Vec<InstToInsertAndExtPoint>,
 ) -> Result<
    (
        Vec<F::Inst>,
        TypedIxVec<BlockIx, InstIx>,
        TypedIxVec<InstIx, InstIx>,
+        Vec<InstIx>,
    ),
    String,
 > {
@ -407,20 +507,31 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
    // We also need to examine and update Func::blocks.  This is assumed to
    // be arranged in ascending order of the Block::start fields.
    //
+    // Also, if the client requested stackmap creation, then `safepoint_insns` will be
+    // non-empty, and we will have to return a vector of the same length, that indicates the
+    // location of each safepoint insn in the final code.  `safepoint_insns` is assumed to be
+    // sorted in ascending order and duplicate-free.
+    //
    // Linear scan relies on the sort being stable here, so make sure to not
    // use an unstable sort. See the comment in `resolve_moves_across blocks`
    // in linear scan's code.

-    insts_to_add.sort_by_key(|mem_move| mem_move.point);
+    insts_to_add.sort_by_key(|to_add| to_add.iep.clone());

    let mut cur_inst_to_add = 0;
    let mut cur_block = BlockIx::new(0);

    let mut insns: Vec<F::Inst> = vec![];
    let mut target_map: TypedIxVec<BlockIx, InstIx> = TypedIxVec::new();
-    let mut orig_insn_map: TypedIxVec<InstIx, InstIx> = TypedIxVec::new();
+
+    let mut new_to_old_insn_map: TypedIxVec<InstIx, InstIx> = TypedIxVec::new();
    target_map.reserve(func.blocks().len());
-    orig_insn_map.reserve(func.insn_indices().len() + insts_to_add.len());
+    new_to_old_insn_map.reserve(func.insn_indices().len() + insts_to_add.len());
+
+    // Index in `safepoint_insns` of the next safepoint insn we will encounter
+    let mut next_safepoint_insn_index = 0;
+    let mut new_safepoint_insns = Vec::<InstIx>::new();
+    new_safepoint_insns.reserve(safepoint_insns.len());

    for iix in func.insn_indices() {
        // Is `iix` the first instruction in a block?  Meaning, are we
@ -431,27 +542,33 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
            target_map.push(InstIx::new(insns.len() as u32));
        }

-        // Copy to the output vector, the extra insts that are to be placed at the
-        // reload point of `iix`.
+        // Copy to the output vector, the first the extra insts that are to be placed at the
+        // reload point of `iix`, and then the extras for the spill-before point of `iix`.
        while cur_inst_to_add < insts_to_add.len()
-            && insts_to_add[cur_inst_to_add].point == InstPoint::new_reload(iix)
+            && insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::SpillBefore)
        {
            insns.push(insts_to_add[cur_inst_to_add].inst.construct(func));
-            orig_insn_map.push(InstIx::invalid_value());
+            new_to_old_insn_map.push(InstIx::invalid_value());
            cur_inst_to_add += 1;
        }

        // Copy the inst at `iix` itself
-        orig_insn_map.push(iix);
+        if next_safepoint_insn_index < safepoint_insns.len()
+            && iix == safepoint_insns[next_safepoint_insn_index]
+        {
+            new_safepoint_insns.push(InstIx::new(insns.len() as u32));
+            next_safepoint_insn_index += 1;
+        }
+        new_to_old_insn_map.push(iix);
        insns.push(func.get_insn(iix).clone());

-        // And copy the extra insts that are to be placed at the spill point of
-        // `iix`.
+        // And copy first, the extra insts that are to be placed at the reload-after point
+        // of `iix`, followed by those to be placed at the spill point of `iix`.
        while cur_inst_to_add < insts_to_add.len()
-            && insts_to_add[cur_inst_to_add].point == InstPoint::new_spill(iix)
+            && insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::Spill)
        {
            insns.push(insts_to_add[cur_inst_to_add].inst.construct(func));
-            orig_insn_map.push(InstIx::invalid_value());
+            new_to_old_insn_map.push(InstIx::invalid_value());
            cur_inst_to_add += 1;
        }

@ -464,8 +581,10 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(

    debug_assert!(cur_inst_to_add == insts_to_add.len());
    debug_assert!(cur_block.get() == func.blocks().len() as u32);
+    debug_assert!(next_safepoint_insn_index == safepoint_insns.len());
+    debug_assert!(new_safepoint_insns.len() == safepoint_insns.len());

-    Ok((insns, target_map, orig_insn_map))
+    Ok((insns, target_map, new_to_old_insn_map, new_safepoint_insns))
 }

 //=============================================================================
@ -474,7 +593,8 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
 #[inline(never)]
 pub(crate) fn edit_inst_stream<F: Function>(
    func: &mut F,
-    insts_to_add: Vec<InstToInsertAndPoint>,
+    safepoint_insns: &Vec<InstIx>,
+    insts_to_add: Vec<InstToInsertAndExtPoint>,
    iixs_to_nop_out: &Vec<InstIx>,
    frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
    reg_universe: &RealRegUniverse,
@ -484,6 +604,7 @@ pub(crate) fn edit_inst_stream<F: Function>(
        Vec<F::Inst>,
        TypedIxVec<BlockIx, InstIx>,
        TypedIxVec<InstIx, InstIx>,
+        Vec<InstIx>,
    ),
    RegAllocError,
 > {
@ -496,5 +617,6 @@ pub(crate) fn edit_inst_stream<F: Function>(
        use_checker,
    )
    .map_err(|e| RegAllocError::RegChecker(e))?;
-    add_spills_reloads_and_moves(func, insts_to_add).map_err(|e| RegAllocError::Other(e))
+    add_spills_reloads_and_moves(func, safepoint_insns, insts_to_add)
+        .map_err(|e| RegAllocError::Other(e))
 }
--- a/third_party/rust/regalloc/src/lib.rs
+++ b/third_party/rust/regalloc/src/lib.rs
@ -15,6 +15,7 @@ mod analysis_main;

 mod analysis_control_flow;
 mod analysis_data_flow;
+mod analysis_reftypes;
 mod avl_tree;
 mod bt_coalescing_analysis;
 mod bt_commitment_map;
@ -266,21 +267,26 @@ pub trait Function {

    /// Generate a spill instruction for insertion into the instruction
    /// sequence. The associated virtual register (whose value is being spilled)
-    /// is passed so that the client may make decisions about the instruction to
-    /// generate based on the type of value in question.  Because the register
-    /// allocator will insert spill instructions at arbitrary points, the
-    /// returned instruction here must not modify the machine's condition codes.
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst;
+    /// is passed, if it exists, so that the client may make decisions about the
+    /// instruction to generate based on the type of value in question.  Because
+    /// the register allocator will insert spill instructions at arbitrary points,
+    /// the returned instruction here must not modify the machine's condition codes.
+    fn gen_spill(
+        &self,
+        to_slot: SpillSlot,
+        from_reg: RealReg,
+        for_vreg: Option<VirtualReg>,
+    ) -> Self::Inst;

    /// Generate a reload instruction for insertion into the instruction
    /// sequence. The associated virtual register (whose value is being loaded)
-    /// is passed as well.  The returned instruction must not modify the
-    /// machine's condition codes.
+    /// is passed as well, if it exists.  The returned instruction must not modify
+    /// the machine's condition codes.
    fn gen_reload(
        &self,
        to_reg: Writable<RealReg>,
        from_slot: SpillSlot,
-        for_vreg: VirtualReg,
+        for_vreg: Option<VirtualReg>,
    ) -> Self::Inst;

    /// Generate a register-to-register move for insertion into the instruction
@ -367,6 +373,14 @@ pub struct RegAllocResult<F: Function> {
    /// call to `allocate_registers`.  Creating of these annotations is
    /// potentially expensive, so don't request them if you don't need them.
    pub block_annotations: Option<TypedIxVec<BlockIx, Vec<String>>>,
+
+    /// If stackmap support was requested: one stackmap for each of the safepoint instructions
+    /// declared.  Otherwise empty.
+    pub stackmaps: Vec<Vec<SpillSlot>>,
+
+    /// If stackmap support was requested: one InstIx for each safepoint instruction declared,
+    /// indicating the corresponding location in the final instruction stream.  Otherwise empty.
+    pub new_safepoint_insns: Vec<InstIx>,
 }

 /// A choice of register allocation algorithm to run.
@ -444,16 +458,36 @@ impl fmt::Debug for Options {
    }
 }

+/// A structure with which callers can request stackmap information.
+pub struct StackmapRequestInfo {
+    /// The register class that holds reftypes.  This may only be RegClass::I32 or
+    /// RegClass::I64, and it must equal the word size of the target architecture.
+    pub reftype_class: RegClass,
+
+    /// The virtual regs that hold reftyped values.  These must be provided in ascending order
+    /// of register index and be duplicate-free.  They must have class `reftype_class`.
+    pub reftyped_vregs: Vec<VirtualReg>,
+
+    /// The indices of instructions for which the allocator will construct stackmaps.  These
+    /// must be provided in ascending order and be duplicate-free.  The specified instructions
+    /// may not be coalescable move instructions (as the allocator may remove those) and they
+    /// may not modify any register carrying a reftyped value (they may "def" or "use" them,
+    /// though).  The reason is that, at a safepoint, the client's garbage collector may change
+    /// the values of all live references, so it would be meaningless for a safepoint
+    /// instruction also to attempt to do that -- we'd end up with two competing new values.
+    pub safepoint_insns: Vec<InstIx>,
+}
+
 /// Allocate registers for a function's code, given a universe of real registers that we are
-/// allowed to use.
+/// allowed to use.  Optionally, stackmap support may be requested.
 ///
 /// The control flow graph must not contain any critical edges, that is, any edge coming from a
 /// block with multiple successors must not flow into a block with multiple predecessors. The
 /// embedder must have split critical edges before handing over the function to this function.
 /// Otherwise, an error will be returned.
 ///
-/// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
-/// fail, returning an error.
+/// Allocation may succeed, returning a `RegAllocResult` with the new instruction sequence, or
+/// it may fail, returning an error.
 ///
 /// Runtime options can be passed to the allocators, through the use of [Options] for options
 /// common to all the backends. The choice of algorithm is done by passing a given [Algorithm]
@ -462,6 +496,7 @@ impl fmt::Debug for Options {
 pub fn allocate_registers_with_opts<F: Function>(
    func: &mut F,
    rreg_universe: &RealRegUniverse,
+    stackmap_info: Option<&StackmapRequestInfo>,
    opts: Options,
 ) -> Result<RegAllocResult<F>, RegAllocError> {
    info!("");
@ -474,10 +509,69 @@ pub fn allocate_registers_with_opts<F: Function>(
            info!("  {}", s);
        }
    }
+    // If stackmap support has been requested, perform some initial sanity checks.
+    if let Some(&StackmapRequestInfo {
+        reftype_class,
+        ref reftyped_vregs,
+        ref safepoint_insns,
+    }) = stackmap_info
+    {
+        if let Algorithm::LinearScan(_) = opts.algorithm {
+            return Err(RegAllocError::Other(
+                "stackmap request: not currently available for Linear Scan".to_string(),
+            ));
+        }
+        if reftype_class != RegClass::I64 && reftype_class != RegClass::I32 {
+            return Err(RegAllocError::Other(
+                "stackmap request: invalid reftype_class".to_string(),
+            ));
+        }
+        let num_avail_vregs = func.get_num_vregs();
+        for i in 0..reftyped_vregs.len() {
+            let vreg = &reftyped_vregs[i];
+            if vreg.get_class() != reftype_class {
+                return Err(RegAllocError::Other(
+                    "stackmap request: invalid vreg class".to_string(),
+                ));
+            }
+            if vreg.get_index() >= num_avail_vregs {
+                return Err(RegAllocError::Other(
+                    "stackmap request: out of range vreg".to_string(),
+                ));
+            }
+            if i > 0 && reftyped_vregs[i - 1].get_index() >= vreg.get_index() {
+                return Err(RegAllocError::Other(
+                    "stackmap request: non-ascending vregs".to_string(),
+                ));
+            }
+        }
+        let num_avail_insns = func.insns().len();
+        for i in 0..safepoint_insns.len() {
+            let safepoint_iix = safepoint_insns[i];
+            if safepoint_iix.get() as usize >= num_avail_insns {
+                return Err(RegAllocError::Other(
+                    "stackmap request: out of range safepoint insn".to_string(),
+                ));
+            }
+            if i > 0 && safepoint_insns[i - 1].get() >= safepoint_iix.get() {
+                return Err(RegAllocError::Other(
+                    "stackmap request: non-ascending safepoint insns".to_string(),
+                ));
+            }
+            if func.is_move(func.get_insn(safepoint_iix)).is_some() {
+                return Err(RegAllocError::Other(
+                    "stackmap request: safepoint insn is a move insn".to_string(),
+                ));
+            }
+        }
+        // We can't check here that reftyped regs are not changed by safepoint insns.  That is
+        // done deep in the stackmap creation logic, for BT in `get_stackmap_artefacts_at`.
+    }
+
    let run_checker = opts.run_checker;
    let res = match &opts.algorithm {
        Algorithm::Backtracking(opts) => {
-            bt_main::alloc_main(func, rreg_universe, run_checker, opts)
+            bt_main::alloc_main(func, rreg_universe, stackmap_info, run_checker, opts)
        }
        Algorithm::LinearScan(opts) => linear_scan::run(func, rreg_universe, run_checker, opts),
    };
@ -502,6 +596,7 @@ pub fn allocate_registers_with_opts<F: Function>(
 pub fn allocate_registers<F: Function>(
    func: &mut F,
    rreg_universe: &RealRegUniverse,
+    stackmap_info: Option<&StackmapRequestInfo>,
    algorithm: AlgorithmWithDefaults,
 ) -> Result<RegAllocResult<F>, RegAllocError> {
    let algorithm = match algorithm {
@ -512,7 +607,7 @@ pub fn allocate_registers<F: Function>(
        algorithm,
        ..Default::default()
    };
-    allocate_registers_with_opts(func, rreg_universe, opts)
+    allocate_registers_with_opts(func, rreg_universe, stackmap_info, opts)
 }

 // Facilities to snapshot regalloc inputs and reproduce them in regalloc.rs.
--- a/third_party/rust/regalloc/src/linear_scan/mod.rs
+++ b/third_party/rust/regalloc/src/linear_scan/mod.rs
@ -11,7 +11,7 @@ use std::env;
 use std::fmt;

 use crate::data_structures::{BlockIx, InstIx, InstPoint, Point, RealReg, RegVecsAndBounds};
-use crate::inst_stream::{add_spills_reloads_and_moves, InstToInsertAndPoint};
+use crate::inst_stream::{add_spills_reloads_and_moves, InstToInsertAndExtPoint};
 use crate::{
    checker::CheckerContext, reg_maps::MentionRegUsageMapper, Function, RealRegUniverse,
    RegAllocError, RegAllocResult, RegClass, Set, SpillSlot, VirtualReg, NUM_REG_CLASSES,
@ -625,7 +625,7 @@ fn set_registers<F: Function>(
    virtual_intervals: &Vec<VirtualInterval>,
    reg_universe: &RealRegUniverse,
    use_checker: bool,
-    memory_moves: &Vec<InstToInsertAndPoint>,
+    memory_moves: &Vec<InstToInsertAndExtPoint>,
 ) -> Set<RealReg> {
    info!("set_registers");

@ -751,7 +751,7 @@ fn set_registers<F: Function>(
 fn apply_registers<F: Function>(
    func: &mut F,
    virtual_intervals: &Vec<VirtualInterval>,
-    memory_moves: Vec<InstToInsertAndPoint>,
+    memory_moves: Vec<InstToInsertAndExtPoint>,
    reg_universe: &RealRegUniverse,
    num_spill_slots: u32,
    use_checker: bool,
@ -766,8 +766,11 @@ fn apply_registers<F: Function>(
        &memory_moves,
    );

-    let (final_insns, target_map, orig_insn_map) =
-        add_spills_reloads_and_moves(func, memory_moves).map_err(|e| RegAllocError::Other(e))?;
+    let safepoint_insns = vec![];
+    let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
+        add_spills_reloads_and_moves(func, &safepoint_insns, memory_moves)
+            .map_err(|e| RegAllocError::Other(e))?;
+    assert!(new_safepoint_insns.is_empty()); // because `safepoint_insns` is also empty.

    // And now remove from the clobbered registers set, all those not available to the allocator.
    // But not removing the reserved regs, since we might have modified those.
@ -782,9 +785,11 @@ fn apply_registers<F: Function>(
    Ok(RegAllocResult {
        insns: final_insns,
        target_map,
-        orig_insn_map,
+        orig_insn_map: new_to_old_insn_map,
        clobbered_registers,
        num_spill_slots,
        block_annotations: None,
+        stackmaps: vec![],
+        new_safepoint_insns,
    })
 }
--- a/third_party/rust/regalloc/src/linear_scan/resolve_moves.rs
+++ b/third_party/rust/regalloc/src/linear_scan/resolve_moves.rs
@ -1,7 +1,7 @@
 use super::{next_use, IntId, Location, RegUses, VirtualInterval};
 use crate::{
    data_structures::{BlockIx, InstPoint, Point},
-    inst_stream::{InstToInsert, InstToInsertAndPoint},
+    inst_stream::{InstExtPoint, InstToInsert, InstToInsertAndExtPoint},
    sparse_set::SparseSet,
    Function, RealReg, Reg, SpillSlot, TypedIxVec, VirtualReg, Writable,
 };
@ -17,7 +17,7 @@ fn resolve_moves_in_block<F: Function>(
    reg_uses: &RegUses,
    scratches_by_rc: &[Option<RealReg>],
    spill_slot: &mut u32,
-    moves_in_blocks: &mut Vec<InstToInsertAndPoint>,
+    moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
    tmp_ordered_moves: &mut Vec<MoveOp>,
    tmp_stack: &mut Vec<MoveOp>,
 ) {
@ -132,13 +132,13 @@ fn resolve_moves_in_block<F: Function>(
                            "inblock fixup: {:?} spill {:?} -> {:?} at {:?}",
                            interval.id, rreg, spill, at_inst
                        );
-                        spills_at_inst.push(InstToInsertAndPoint::new(
+                        spills_at_inst.push(InstToInsertAndExtPoint::new(
                            InstToInsert::Spill {
                                to_slot: spill,
                                from_reg: rreg,
-                                for_vreg: vreg,
+                                for_vreg: Some(vreg),
                            },
-                            at_inst,
+                            InstExtPoint::from_inst_point(at_inst),
                        ));
                    }

@ -324,8 +324,8 @@ fn resolve_moves_across_blocks<F: Function>(
    intervals: &Vec<VirtualInterval>,
    scratches_by_rc: &[Option<RealReg>],
    spill_slot: &mut u32,
-    moves_at_block_starts: &mut Vec<InstToInsertAndPoint>,
-    moves_at_block_ends: &mut Vec<InstToInsertAndPoint>,
+    moves_at_block_starts: &mut Vec<InstToInsertAndExtPoint>,
+    moves_at_block_ends: &mut Vec<InstToInsertAndExtPoint>,
    tmp_ordered_moves: &mut Vec<MoveOp>,
    tmp_stack: &mut Vec<MoveOp>,
 ) {
@ -500,7 +500,7 @@ pub(crate) fn run<F: Function>(
    liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
    spill_slot: &mut u32,
    scratches_by_rc: &[Option<RealReg>],
-) -> Vec<InstToInsertAndPoint> {
+) -> Vec<InstToInsertAndExtPoint> {
    info!("resolve_moves");

    // Keep three lists of moves to insert:
@ -624,14 +624,14 @@ impl MoveOp {
                MoveOperand::Stack(to) => InstToInsert::Spill {
                    to_slot: to,
                    from_reg: from,
-                    for_vreg: self.vreg,
+                    for_vreg: Some(self.vreg),
                },
            },
            MoveOperand::Stack(from) => match self.to {
                MoveOperand::Reg(to) => InstToInsert::Reload {
                    to_reg: Writable::from_reg(to),
                    from_slot: from,
-                    for_vreg: self.vreg,
+                    for_vreg: Some(self.vreg),
                },
                MoveOperand::Stack(_to) => unreachable!("stack to stack move"),
            },
@ -749,7 +749,7 @@ fn emit_moves(
    ordered_moves: &Vec<MoveOp>,
    num_spill_slots: &mut u32,
    scratches_by_rc: &[Option<RealReg>],
-    moves_in_blocks: &mut Vec<InstToInsertAndPoint>,
+    moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
 ) {
    let mut spill_slot = None;
    let mut in_cycle = false;
@ -770,9 +770,12 @@ fn emit_moves(
                    let inst = InstToInsert::Reload {
                        to_reg: Writable::from_reg(dst_reg),
                        from_slot: spill_slot.expect("should have a cycle spill slot"),
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    trace!(
                        "finishing cycle: {:?} -> {:?}",
                        spill_slot.unwrap(),
@ -785,15 +788,21 @@ fn emit_moves(
                    let inst = InstToInsert::Reload {
                        to_reg: Writable::from_reg(scratch),
                        from_slot: spill_slot.expect("should have a cycle spill slot"),
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    let inst = InstToInsert::Spill {
                        to_slot: dst_spill,
                        from_reg: scratch,
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    trace!(
                        "finishing cycle: {:?} -> {:?} -> {:?}",
                        spill_slot.unwrap(),
@ -828,9 +837,12 @@ fn emit_moves(
                    let inst = InstToInsert::Spill {
                        to_slot: spill_slot.unwrap(),
                        from_reg: src_reg,
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    trace!("starting cycle: {:?} -> {:?}", src_reg, spill_slot.unwrap());
                }
                MoveOperand::Stack(src_spill) => {
@ -839,15 +851,21 @@ fn emit_moves(
                    let inst = InstToInsert::Reload {
                        to_reg: Writable::from_reg(scratch),
                        from_slot: src_spill,
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    let inst = InstToInsert::Spill {
                        to_slot: spill_slot.expect("should have a cycle spill slot"),
                        from_reg: scratch,
-                        for_vreg: mov.vreg,
+                        for_vreg: Some(mov.vreg),
                    };
-                    moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+                    moves_in_blocks.push(InstToInsertAndExtPoint::new(
+                        inst,
+                        InstExtPoint::from_inst_point(at_inst),
+                    ));
                    trace!(
                        "starting cycle: {:?} -> {:?} -> {:?}",
                        src_spill,
@ -862,7 +880,10 @@ fn emit_moves(

        // A normal move which is not part of a cycle.
        let inst = mov.gen_inst();
-        moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
+        moves_in_blocks.push(InstToInsertAndExtPoint::new(
+            inst,
+            InstExtPoint::from_inst_point(at_inst),
+        ));
        trace!("moving {:?} -> {:?}", mov.from, mov.to);
    }
 }
--- a/third_party/rust/regalloc/src/snapshot.rs
+++ b/third_party/rust/regalloc/src/snapshot.rs
@ -17,8 +17,8 @@ use serde::{Deserialize, Serialize};
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 enum IRInstKind {
-    Spill { vreg: VirtualReg },
-    Reload { vreg: VirtualReg },
+    Spill { vreg: Option<VirtualReg> },
+    Reload { vreg: Option<VirtualReg> },
    Move { vreg: VirtualReg },
    ZeroLenNop,
    UserReturn,
@ -158,7 +158,12 @@ impl IRSnapshot {
    }

    pub fn allocate(&mut self, opts: Options) -> Result<RegAllocResult<IRFunction>, RegAllocError> {
-        allocate_registers_with_opts(&mut self.func, &self.reg_universe, opts)
+        allocate_registers_with_opts(
+            &mut self.func,
+            &self.reg_universe,
+            None, /*no stackmap request*/
+            opts,
+        )
    }
 }

@ -253,7 +258,7 @@ impl Function for IRFunction {
        &self,
        _to_slot: SpillSlot,
        from_reg: RealReg,
-        for_vreg: VirtualReg,
+        for_vreg: Option<VirtualReg>,
    ) -> Self::Inst {
        IRInst {
            reg_uses: vec![from_reg.to_reg()],
@ -266,7 +271,7 @@ impl Function for IRFunction {
        &self,
        to_reg: Writable<RealReg>,
        _from_slot: SpillSlot,
-        for_vreg: VirtualReg,
+        for_vreg: Option<VirtualReg>,
    ) -> Self::Inst {
        IRInst {
            reg_uses: vec![],
--- a/third_party/rust/wasmparser/.cargo-checksum.json
+++ b/third_party/rust/wasmparser/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.lock":"66295ad9f17449e9ef5c16b64c9f0fca138ff07e31fb182bdd134099a7d049b4","Cargo.toml":"ddff8c2657f4fd0f83ce3b732cea03b8eb1f434fdce886fba2904cee5b0090d5","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"a50793192bdc1729a786bb456e5ad1e567c7f4b6a0a13ab0e46754e965978e8f","compare-with-main.sh":"2ddfab71ba571055292a29a36c1ede05f64ba094c78495b945d1486bf32ab6d7","examples/dump.rs":"a5944669754d1093c048a3b2e959c8c22e485a8069582d532172a8207e54dce6","examples/simple.rs":"0bbf762ca214815d81a915106efca05a9fa642a7a250c704c188258ec15d2629","src/binary_reader.rs":"d209e8cf15db30cb06e4c23980de775a59db8654aeb7a69bbe432c09f5046f76","src/lib.rs":"62c4b60aae7b7c5018caf68da31f929956f188042fa0715781e6879148f79db1","src/limits.rs":"22649a707c3f894d0381d745f744876a106cacb72d8a9a608cfa7a6f3f1e5631","src/module_resources.rs":"3a2137adb9018a5d5ebcaf274f969e650e184f77e5db62cd9b655cc6e97fdee1","src/operators_validator.rs":"4d98039d738be26670f7fb399e0738dde6caa170c09139badb62190795c78593","src/parser.rs":"061ba728cbf044456c088255c4c633d5bcc630fe9035a21168f068e498e8255c","src/primitives.rs":"c5056a6f6f444cdd4b45d2b7bb332b776088d7b5bc323e3daddeb48110025b25","src/readers/alias_section.rs":"fa64491870d97577bad7f1891aab1be7597a940bc0e2ccfef0c84847e9df3d6d","src/readers/code_section.rs":"bfdd8d5f08ef357679d7bfe6f9735ff4f08925361e0771a6b1b5112a12c62f30","src/readers/data_count_section.rs":"e711720f8205a906794dc7020a656a2ae74e1d9c3823fcdcdbd9d2f3b206c7d7","src/readers/data_section.rs":"f572e7d2589f0bccf5e97d43c1ca3aac103cbd47d139ead6b84b39b5c9d47c0b","src/readers/element_section.rs":"0193c9b7be80a0c18cba9f2d2892dba83819339aaf39a44d44003fec5328196c","src/readers/export_section.rs":"7c74f7a11406a95c162f6ad4f77aafd0b1eee309f33b69f06bea12b23925e143","src/readers/function_section.rs":"57c0479ba8d7f61908ed74e86cbc26553fdd6d2d952f032ce29385a39f82efd3","src/readers/global_section.rs":"5fa18bed0fffadcc2dbdcbaedbe4e4398992fd1ce9e611b0319333a7681082ac","src/readers/import_section.rs":"236e754867ad7829b5a95221051daff3c5df971aff9f2339fa11256f2309d209","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/instance_section.rs":"7b78bbca4b79ac7f9c42455815863a4d32dc074c5973ab1035dbfdf88b0d3c12","src/readers/linking_section.rs":"9df71f3ee5356f0d273c099212213353080001e261ca697caddf6b847fb5af09","src/readers/memory_section.rs":"83212f86cfc40d18fb392e9234c880afdf443f4af38a727ba346f9c740ef8718","src/readers/mod.rs":"d80ba76d763b06ae6e570f09a312b20006f0b83b5cd01d6baab496006fe9b7f1","src/readers/module.rs":"04f6e0bb7250f86037f30754d95a2941623272002184f372ed159db19f52dc7e","src/readers/module_code_section.rs":"aa9cf64f65e43ea5fcf9e695b7b1ba5a45b92d537f7ccef379a07162108ce9d9","src/readers/module_section.rs":"7a0c0b34478ec32030c5400df2366914c7e08ba799440a8c5ea999755c489e7f","src/readers/name_section.rs":"23b5106b17744833fb8cd61cb102e756bccb4a44594d34a5dd8b7930307ac4cb","src/readers/operators.rs":"1defc15f364775018ffe8c7f010ff83342c46659f780be4ba88c58fad7606e03","src/readers/producers_section.rs":"674f402fc4545c94487f827153871b37adab44ed5eff4070a436eb18e514023a","src/readers/reloc_section.rs":"0ef818a8b83a4542c4c29c23642436a92d3e7c37bc0248e817ed5a9d65ec38ce","src/readers/section_reader.rs":"f27f017938bb8602954298d053cd3b79d8876f9fcbbe0e1a3380051b6aa4584a","src/readers/sourcemappingurl_section.rs":"eff317f6f2b728a98a5eb68eec7e6cf222d27158d0d5597fd1c84f09b1092a50","src/readers/start_section.rs":"012fe574a5b94ea34c9d689629fb0df2f5ba4c11c835147b39155f5a8c715e34","src/readers/table_section.rs":"e564876825a7b31df2b5dc850279b523e26dc50a08da935cc8d635a49e809951","src/readers/type_section.rs":"c2f9d7b77a1315d323bebe94ced44dc10b77c0e75c1e367bb594a402c74933ba","src/tests.rs":"5d47ec97d0a303d8cbe905f8ddcf11212a03607e0b245c9f52371464e7d08ee7","src/validator.rs":"bec65fde1d8b98d80d082067a6ccf006f35e3f06062cac97887bd5a04ef75192"},"package":"721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"}
+{"files":{"Cargo.lock":"097d8d38fce861128185d43320f25aacc4f306c7a273427f0cc53460d2985c64","Cargo.toml":"38db996a0283398e7353969aa161779c7bd8160c8ced694497b12977b5983c95","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"fd8556367534c5aa04960dc8c053dd50e531d2fbe6234b811d5e3ae95649e463","compare-with-main.sh":"2ddfab71ba571055292a29a36c1ede05f64ba094c78495b945d1486bf32ab6d7","examples/simple.rs":"606072a46c5c80df29da3ecd98a989feb1289243550033cd3c3e1df6045ce8ce","src/binary_reader.rs":"b99ceb7182a2581e8834a9a6a7f351bdf0e0f9fbbbcdc4af3bfb9c506c3fc219","src/lib.rs":"414ed00613d315875c0e84750fa0086a1a67e776a9f40637f547bf99f1cc7f0f","src/limits.rs":"76226dcbb57180a399d1e5dfaed0b2516728ab09d91852e3f9aa3adebf06b3b7","src/module_resources.rs":"3a2137adb9018a5d5ebcaf274f969e650e184f77e5db62cd9b655cc6e97fdee1","src/operators_validator.rs":"5fb8f7611f5bf3115016ae81cc4e1e1e0ac4605725df0c9131f0c5d5d1a9514f","src/parser.rs":"c05a92a04020e990635728101e3f7d6653ccdeb54f2ce3615fc6c058b344cd8e","src/primitives.rs":"f93340797ff49370c13a489dc34832771156bb38541b19cb56e371e9a2e099b2","src/readers/alias_section.rs":"ef6556c3e300549958010aba9f1a0f6852c80ceddc763b4c11463d97234488b3","src/readers/code_section.rs":"ab19a5ed2a72e85e8365a8f6915ebbc10ca07b72a097322c53c36fdfb13bd57c","src/readers/data_section.rs":"d919a22ebc44c53ca434df6c1d16efc8e126e7b55ed99691d5ae73c10bfadfff","src/readers/element_section.rs":"f168a3cb02439aeaa81621525e2747d3bc4743fac2237dcdf8658b67e010ca08","src/readers/export_section.rs":"3fe296f1789e789009a79115052194a1352d947f2a8830945d6b7d9983bb8579","src/readers/function_section.rs":"5467d7a375c22a4cc225819212e616f275ef01516f185b346eae2ffbb5c53cb3","src/readers/global_section.rs":"359450911ac662503f90288798baec2415df6d3b80990a7b75794683df7894b8","src/readers/import_section.rs":"80906451f78c64d31a76772d97c96d18d208eeabaaaf82372b0567a8991795c1","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/instance_section.rs":"0f6cc9ed6bb6520493090eff5bbd6a9030ba1432206799b7dfed0b9285bafd73","src/readers/linking_section.rs":"db3091a48827a5b035e2f79f40f7ed9a7ea10acd4db6ab2bbd01e17a65a4265e","src/readers/memory_section.rs":"67d8457d3167b39fc9ae2c04f3c3e28bc10be97bbdaccd681675fb8d3eba2bd3","src/readers/mod.rs":"0fbaa4e1d7e3e68d8857fd8b3ce5c3fba886a1b460cd37398afcbf4802280b4e","src/readers/module_code_section.rs":"806eea527c84570ca6c5b8ca556f95727edae7da29e4f384b067113231b8e5f5","src/readers/module_section.rs":"6e28be8f1f4d2f1a9470ec356c46a76c2f21916143a1f0e245b96d2272d0301e","src/readers/name_section.rs":"60d4aa007cfdc16eedc1b4cb0bee560f6eebd82aaa81e9be9c844e515b16e445","src/readers/operators.rs":"3800f0321a776ddc5e8fb030828e4f2a65ebafa4b7f0808774384559ddfe49ea","src/readers/producers_section.rs":"77f93449e4bdcd61e4c79e47a685742f49cd5dac837ba002bce14120f14c9470","src/readers/reloc_section.rs":"e48e6acaa5145d6fbe1d74eb406ee59c43235faa47fbf0b07288504e60573a5e","src/readers/section_reader.rs":"e99763ce9c48994fd1e92f011a449936c4206a5c91d50fa580d003b6cc824ec5","src/readers/table_section.rs":"5d94185f68c4c7526a8836a2ebdb5b20fe754af58b68d2d0eb8fea62b7e6fe71","src/readers/type_section.rs":"87a54d238bf900aac0d0508e5c644d71b1e591df99367587feb68146a25a5a61","src/validator.rs":"8d401bdac4f8ecdd477a9fe5b979d82b49c4b988194a9a4f1253bafd15e36b11","src/validator/func.rs":"69508e0b0cfde783ca3635070496573b65a4b3ce9c39fe5afb0b6af19e346b2b"},"package":"a950e6a618f62147fd514ff445b2a0b53120d382751960797f85f058c7eda9b9"}
--- a/third_party/rust/wasmparser/Cargo.lock
+++ b/third_party/rust/wasmparser/Cargo.lock
@ -208,9 +208,9 @@ checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177"

 [[package]]
 name = "hermit-abi"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909"
+checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
 dependencies = [
 "libc",
 ]
@ -247,9 +247,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

 [[package]]
 name = "libc"
-version = "0.2.71"
+version = "0.2.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
+checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701"

 [[package]]
 name = "log"
@ -274,9 +274,9 @@ checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"

 [[package]]
 name = "memoffset"
-version = "0.5.4"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8"
+checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f"
 dependencies = [
 "autocfg",
 ]
@ -470,9 +470,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "1.0.33"
+version = "1.0.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd"
+checksum = "936cae2873c940d92e697597c5eee105fb570cd5689c695806f672883653349b"
 dependencies = [
 "proc-macro2",
 "quote",
@ -577,7 +577,7 @@ checksum = "7f7b90ea6c632dd06fd765d44542e234d5e63d9bb917ecd64d79778a13bd79ae"

 [[package]]
 name = "wasmparser"
-version = "0.58.0"
+version = "0.59.0"
 dependencies = [
 "anyhow",
 "criterion",
--- a/Показать больше
+++ b/Показать больше