зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1633721, part 1 of 2: Bump Cranelift to revision 5e0268a542f612fee36d0256ed1f6a0e18dc02b3. r=bbouvier
This patch updates the vendored version of Cranelift, pulling in the reference-types support recently merged in Cranelift's PR bytecodealliance/wasmtime#1852. Usage of this update to support reftypes in SpiderMonkey on aarch64 is added in the subsequent commit. Differential Revision: https://phabricator.services.mozilla.com/D83582
This commit is contained in:
Родитель
22e60f1257
Коммит
c07df47355
|
@ -65,7 +65,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
|
|||
[source."https://github.com/bytecodealliance/wasmtime"]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
replace-with = "vendored-sources"
|
||||
rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
|
||||
[source."https://github.com/badboy/failure"]
|
||||
git = "https://github.com/badboy/failure"
|
||||
|
|
|
@ -733,22 +733,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
dependencies = [
|
||||
"cranelift-entity 0.65.0",
|
||||
"cranelift-entity 0.66.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.65.0",
|
||||
"cranelift-entity 0.66.0",
|
||||
"log",
|
||||
"regalloc",
|
||||
"smallvec",
|
||||
|
@ -758,17 +758,17 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-meta"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
dependencies = [
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.65.0",
|
||||
"cranelift-entity 0.66.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
|
@ -777,13 +777,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"log",
|
||||
|
@ -793,15 +793,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.65.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
version = "0.66.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=5e0268a542f612fee36d0256ed1f6a0e18dc02b3#5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"cranelift-entity 0.65.0",
|
||||
"cranelift-entity 0.66.0",
|
||||
"cranelift-frontend",
|
||||
"log",
|
||||
"thiserror",
|
||||
"wasmparser 0.58.0",
|
||||
"wasmparser 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3951,9 +3951,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.26"
|
||||
version = "0.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95"
|
||||
checksum = "3598bed0895fe0f72a9e0b00ef9e3a3c8af978a8401b2f2046dec5927de6364a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
|
@ -5382,9 +5382,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
|
|||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.58.0"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"
|
||||
checksum = "a950e6a618f62147fd514ff445b2a0b53120d382751960797f85f058c7eda9b9"
|
||||
|
||||
[[package]]
|
||||
name = "wast"
|
||||
|
|
|
@ -74,8 +74,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f
|
|||
|
||||
[patch.crates-io.cranelift-codegen]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
|
||||
[patch.crates-io.cranelift-wasm]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
|
||||
rev = "5e0268a542f612fee36d0256ed1f6a0e18dc02b3"
|
||||
|
|
|
@ -13,8 +13,8 @@ name = "baldrdash"
|
|||
# cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
|
||||
# you should update the following $TOP_LEVEL/Cargo.toml file: look for the
|
||||
# revision (rev) hashes of both cranelift dependencies (codegen and wasm).
|
||||
cranelift-codegen = { version = "0.65.0", default-features = false }
|
||||
cranelift-wasm = { version = "0.65.0" }
|
||||
cranelift-codegen = { version = "0.66.0", default-features = false }
|
||||
cranelift-wasm = { version = "0.66.0" }
|
||||
log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
|
||||
env_logger = "0.6"
|
||||
smallvec = "1.0"
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"bbdc795c9e6a82fd6555b6bb1a01d2c4d9439278e6fd801225205be69c73a2d7","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
||||
{"files":{"Cargo.toml":"dbbc3b62d88aec50ed9e05a6306e3aa4c5d12bf473dff48cf020843a5db31a85","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "A forest of B+-trees"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-bforest"
|
||||
|
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-entity = { path = "../entity", version = "0.65.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.66.0", default-features = false }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"7c01a301a32e60cd9b0edd66f4cf8700e5de1d31607437ea756d4f8b0ae29a54","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"2b3c5105e32bce932d2628963cc5c853207e37204a6aec38caace60e52870bbe","src/isa/x86/instructions.rs":"1aee81c8bc0215fa1cad83e97a0915b24521ae61d503cd727a2406a25dd60f29","src/isa/x86/legalize.rs":"ddc834ae8f4a06ca8e3fccf7aef6a097163a2f8d258a7cbc3cc6a8b93c9c0413","src/isa/x86/mod.rs":"2b84474c2b0e272c1ebe32530c57f6b11133127c286c8f82c5ae5b6486386238","src/isa/x86/opcodes.rs":"79d42b71f78119f4ca1dc4fc90bc9efb04c6fc526e01cbe79368aa59f117266a","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"2d3e09ca34638e19621aef2492ca6943b105e6add830bd91bddbdc85277cb680","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"5ffa26a91b344fb7014a34e0d97b4df90d604a5bd49a49a75c262591deb8e6c4","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"7800f51d97a95d572310f6c80ded59c1c84cf3ba06f9425f4205f88ac46b4e98","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
||||
{"files":{"Cargo.toml":"d01629d478557c181b999c1722b6284435f45f04957d7cb55735b9605136a23e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"a0f5212fa593caf66371f5ee4b15e501939a9407c4663bff6b3ba356b11ca1b4","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"5ae9e5453c3aa8b12a37e5579b602162fa9e153b444e89bb89342614b6a5ed13","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"75fa07c819b467857d1f4231123e16e519312c9a58f5e7b9c633b0768fd88b42","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"186c688dd8ac773f2b2c4c1f1cbdb7a66ca13a8ed90c03f87dfe7fdaa12c15b3","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"745ef09f4927b5334d68155fa047910ef96311feef7ec20964bb033c3419cd3c","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"47a5e9fb3b7917cfe817d56dcc77c0470545e451e0f38a875af0531fbd9b6a58","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38e65efb654dd39e9929b8506ba94d6214d996d727815829b659fd1a3b73bdce","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"7800f51d97a95d572310f6c80ded59c1c84cf3ba06f9425f4205f88ac46b4e98","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "cranelift-codegen-meta"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "Metaprogram for cranelift-codegen code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
|
@ -12,8 +12,8 @@ edition = "2018"
|
|||
rustdoc-args = [ "--document-private-items" ]
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.65.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.65.0" }
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.66.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.66.0" }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -62,7 +62,7 @@ impl InstructionGroup {
|
|||
self.instructions
|
||||
.iter()
|
||||
.find(|inst| inst.name == name)
|
||||
.unwrap_or_else(|| panic!("unexisting instruction with name {}", name))
|
||||
.unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -598,7 +598,7 @@ fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionF
|
|||
|
||||
assert_eq!(
|
||||
num_values, format.num_value_operands,
|
||||
"inst {} doesnt' have as many value input operand as its format {} declares; you may need \
|
||||
"inst {} doesn't have as many value input operands as its format {} declares; you may need \
|
||||
to use a different format.",
|
||||
inst_name, format.name
|
||||
);
|
||||
|
|
|
@ -211,6 +211,24 @@ impl TypeVar {
|
|||
"can't double 256 lanes"
|
||||
);
|
||||
}
|
||||
DerivedFunc::MergeLanes => {
|
||||
assert!(
|
||||
ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
|
||||
"can't double all integer types"
|
||||
);
|
||||
assert!(
|
||||
ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
|
||||
"can't double all float types"
|
||||
);
|
||||
assert!(
|
||||
ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
|
||||
"can't double all boolean types"
|
||||
);
|
||||
assert!(
|
||||
*ts.lanes.iter().min().unwrap() > 1,
|
||||
"can't halve a scalar type"
|
||||
);
|
||||
}
|
||||
DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
|
||||
}
|
||||
|
||||
|
@ -248,6 +266,9 @@ impl TypeVar {
|
|||
pub fn split_lanes(&self) -> TypeVar {
|
||||
self.derived(DerivedFunc::SplitLanes)
|
||||
}
|
||||
pub fn merge_lanes(&self) -> TypeVar {
|
||||
self.derived(DerivedFunc::MergeLanes)
|
||||
}
|
||||
|
||||
/// Constrain the range of types this variable can assume to a subset of those in the typeset
|
||||
/// ts.
|
||||
|
@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc {
|
|||
HalfVector,
|
||||
DoubleVector,
|
||||
SplitLanes,
|
||||
MergeLanes,
|
||||
}
|
||||
|
||||
impl DerivedFunc {
|
||||
|
@ -367,6 +389,7 @@ impl DerivedFunc {
|
|||
DerivedFunc::HalfVector => "half_vector",
|
||||
DerivedFunc::DoubleVector => "double_vector",
|
||||
DerivedFunc::SplitLanes => "split_lanes",
|
||||
DerivedFunc::MergeLanes => "merge_lanes",
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -377,6 +400,8 @@ impl DerivedFunc {
|
|||
DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
|
||||
DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
|
||||
DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
|
||||
DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
|
||||
DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -462,6 +487,7 @@ impl TypeSet {
|
|||
DerivedFunc::HalfVector => self.half_vector(),
|
||||
DerivedFunc::DoubleVector => self.double_vector(),
|
||||
DerivedFunc::SplitLanes => self.half_width().double_vector(),
|
||||
DerivedFunc::MergeLanes => self.double_width().half_vector(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -601,7 +627,8 @@ impl TypeSet {
|
|||
DerivedFunc::DoubleWidth => self.half_width(),
|
||||
DerivedFunc::HalfVector => self.double_vector(),
|
||||
DerivedFunc::DoubleVector => self.half_vector(),
|
||||
DerivedFunc::SplitLanes => self.half_vector().double_width(),
|
||||
DerivedFunc::SplitLanes => self.double_width().half_vector(),
|
||||
DerivedFunc::MergeLanes => self.half_width().double_vector(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -700,6 +700,7 @@ fn gen_isa(
|
|||
pub(crate) fn generate(
|
||||
isas: &[TargetIsa],
|
||||
transform_groups: &TransformGroups,
|
||||
extra_legalization_groups: &[&'static str],
|
||||
filename_prefix: &str,
|
||||
out_dir: &str,
|
||||
) -> Result<(), error::Error> {
|
||||
|
@ -711,8 +712,14 @@ pub(crate) fn generate(
|
|||
fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?;
|
||||
}
|
||||
|
||||
// Add extra legalization groups that were explicitly requested.
|
||||
for group in extra_legalization_groups {
|
||||
shared_group_names.insert(group);
|
||||
}
|
||||
|
||||
// Generate shared legalize groups.
|
||||
let mut fmt = Formatter::new();
|
||||
// Generate shared legalize groups.
|
||||
let mut type_sets = UniqueTable::new();
|
||||
let mut sorted_shared_group_names = Vec::from_iter(shared_group_names);
|
||||
sorted_shared_group_names.sort();
|
||||
|
|
|
@ -6,10 +6,10 @@ use std::fmt;
|
|||
mod arm32;
|
||||
mod arm64;
|
||||
mod riscv;
|
||||
mod x86;
|
||||
pub(crate) mod x86;
|
||||
|
||||
/// Represents known ISA target.
|
||||
#[derive(Copy, Clone)]
|
||||
#[derive(PartialEq, Copy, Clone)]
|
||||
pub enum Isa {
|
||||
Riscv,
|
||||
X86,
|
||||
|
|
|
@ -1669,6 +1669,7 @@ fn define_simd(
|
|||
let ssub_sat = shared.by_name("ssub_sat");
|
||||
let store = shared.by_name("store");
|
||||
let store_complex = shared.by_name("store_complex");
|
||||
let swiden_low = shared.by_name("swiden_low");
|
||||
let uadd_sat = shared.by_name("uadd_sat");
|
||||
let uload8x8 = shared.by_name("uload8x8");
|
||||
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||
|
@ -1676,6 +1677,9 @@ fn define_simd(
|
|||
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
||||
let uload32x2 = shared.by_name("uload32x2");
|
||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||
let snarrow = shared.by_name("snarrow");
|
||||
let unarrow = shared.by_name("unarrow");
|
||||
let uwiden_low = shared.by_name("uwiden_low");
|
||||
let ushr_imm = shared.by_name("ushr_imm");
|
||||
let usub_sat = shared.by_name("usub_sat");
|
||||
let vconst = shared.by_name("vconst");
|
||||
|
@ -1686,7 +1690,6 @@ fn define_simd(
|
|||
let x86_fmin = x86.by_name("x86_fmin");
|
||||
let x86_movlhps = x86.by_name("x86_movlhps");
|
||||
let x86_movsd = x86.by_name("x86_movsd");
|
||||
let x86_packss = x86.by_name("x86_packss");
|
||||
let x86_pblendw = x86.by_name("x86_pblendw");
|
||||
let x86_pextr = x86.by_name("x86_pextr");
|
||||
let x86_pinsr = x86.by_name("x86_pinsr");
|
||||
|
@ -1696,6 +1699,7 @@ fn define_simd(
|
|||
let x86_pminu = x86.by_name("x86_pminu");
|
||||
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||
let x86_pmuludq = x86.by_name("x86_pmuludq");
|
||||
let x86_palignr = x86.by_name("x86_palignr");
|
||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
|
@ -1900,9 +1904,35 @@ fn define_simd(
|
|||
rec_fa.opcodes(low),
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD narrow/widen
|
||||
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
|
||||
let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes));
|
||||
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
|
||||
}
|
||||
for (ty, opcodes, isap) in &[
|
||||
(I16, &PACKUSWB[..], None),
|
||||
(I32, &PACKUSDW[..], Some(use_sse41_simd)),
|
||||
] {
|
||||
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
|
||||
}
|
||||
for (ty, swiden_opcode, uwiden_opcode) in &[
|
||||
(I8, &PMOVSXBW[..], &PMOVZXBW[..]),
|
||||
(I16, &PMOVSXWD[..], &PMOVZXWD[..]),
|
||||
] {
|
||||
let isap = Some(use_sse41_simd);
|
||||
let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
|
||||
let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
|
||||
}
|
||||
for ty in &[I8, I16, I32, I64] {
|
||||
e.enc_both_inferred_maybe_isap(
|
||||
x86_palignr.bind(vector(*ty, sse_vector_size)),
|
||||
rec_fa_ib.opcodes(&PALIGNR[..]),
|
||||
Some(use_ssse3_simd),
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
||||
|
|
|
@ -454,35 +454,6 @@ pub(crate) fn define(
|
|||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I16xN = &TypeVar::new(
|
||||
"I16xN",
|
||||
"A SIMD vector type containing integers 16-bits wide and up",
|
||||
TypeSetBuilder::new()
|
||||
.ints(16..32)
|
||||
.simd_lanes(4..8)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I16xN);
|
||||
let y = &Operand::new("y", I16xN);
|
||||
let a = &Operand::new("a", &I16xN.split_lanes());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_packss",
|
||||
r#"
|
||||
Convert packed signed integers the lanes of ``x`` and ``y`` into half-width integers, using
|
||||
signed saturation to handle overflows. For example, with notional i16x2 vectors, where
|
||||
``x = [x1, x0]`` and ``y = [y1, y0]``, this operation would result in
|
||||
``a = [y1', y0', x1', x0']`` (using the Intel manual's right-to-left lane ordering).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", FxN);
|
||||
let y = &Operand::new("y", FxN);
|
||||
let a = &Operand::new("a", FxN);
|
||||
|
@ -693,6 +664,21 @@ pub(crate) fn define(
|
|||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let c = &Operand::new("c", uimm8)
|
||||
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_palignr",
|
||||
r#"
|
||||
Concatenate destination and source operands, extracting a byte-aligned result shifted to
|
||||
the right by `c`.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, c])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let i64_t = &TypeVar::new(
|
||||
"i64_t",
|
||||
"A scalar 64bit integer",
|
||||
|
|
|
@ -383,6 +383,7 @@ fn define_simd(
|
|||
let fcmp = insts.by_name("fcmp");
|
||||
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
|
||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||
let fmax = insts.by_name("fmax");
|
||||
let fmin = insts.by_name("fmin");
|
||||
let fneg = insts.by_name("fneg");
|
||||
|
@ -405,14 +406,19 @@ fn define_simd(
|
|||
let uadd_sat = insts.by_name("uadd_sat");
|
||||
let umax = insts.by_name("umax");
|
||||
let umin = insts.by_name("umin");
|
||||
let snarrow = insts.by_name("snarrow");
|
||||
let swiden_high = insts.by_name("swiden_high");
|
||||
let swiden_low = insts.by_name("swiden_low");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let uwiden_high = insts.by_name("uwiden_high");
|
||||
let uwiden_low = insts.by_name("uwiden_low");
|
||||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_packss = x86_instructions.by_name("x86_packss");
|
||||
let x86_palignr = x86_instructions.by_name("x86_palignr");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||
|
@ -575,7 +581,7 @@ fn define_simd(
|
|||
def!(g = raw_bitcast_i16x8_again(f)),
|
||||
def!(h = x86_psra(g, b)),
|
||||
// Re-pack the vector.
|
||||
def!(z = x86_packss(e, h)),
|
||||
def!(z = snarrow(e, h)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
@ -785,6 +791,26 @@ fn define_simd(
|
|||
);
|
||||
}
|
||||
|
||||
// SIMD widen
|
||||
for ty in &[I8, I16] {
|
||||
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = swiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = swiden_low(c)),
|
||||
],
|
||||
);
|
||||
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = uwiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = uwiden_low(c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
|
@ -797,4 +823,5 @@ fn define_simd(
|
|||
|
||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
|
||||
narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ mod legalize;
|
|||
mod opcodes;
|
||||
mod recipes;
|
||||
mod registers;
|
||||
mod settings;
|
||||
pub(crate) mod settings;
|
||||
|
||||
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let settings = settings::define(&shared_defs.settings);
|
||||
|
@ -47,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
|
||||
x86_32.legalize_type(F32, x86_expand);
|
||||
x86_32.legalize_type(F64, x86_expand);
|
||||
x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
|
@ -60,6 +61,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
|
||||
x86_64.legalize_type(F32, x86_expand);
|
||||
x86_64.legalize_type(F64, x86_expand);
|
||||
x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
|
|
|
@ -314,7 +314,7 @@ pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
|
|||
/// xmm1 (SSSE3).
|
||||
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
|
||||
/// integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
||||
|
||||
|
@ -322,6 +322,14 @@ pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
|||
/// word integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
|
||||
/// integers in xmm1 using unsigned saturation (SSE2).
|
||||
pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
|
||||
|
||||
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
|
||||
/// word integers in xmm1 using unsigned saturation (SSE4.1).
|
||||
pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
|
||||
|
||||
/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
|
||||
|
||||
|
@ -346,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
|||
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||
|
||||
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
|
||||
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
|
||||
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
|
||||
|
||||
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
|
||||
|
||||
|
@ -465,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
|
|||
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
||||
|
||||
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1.
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
||||
|
||||
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
|
@ -477,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
|
|||
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
||||
|
||||
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1.
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
||||
|
||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||
|
|
|
@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
|
|||
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
let mut settings = SettingGroupBuilder::new("x86");
|
||||
|
||||
settings.add_bool(
|
||||
"use_new_backend",
|
||||
"Whether to use the new codegen backend using the new isel",
|
||||
false,
|
||||
);
|
||||
|
||||
// CPUID.01H:ECX
|
||||
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
|
||||
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
|
||||
|
|
|
@ -25,7 +25,11 @@ pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
|
|||
}
|
||||
|
||||
/// Generates all the Rust source files used in Cranelift from the meta-language.
|
||||
pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
pub fn generate(
|
||||
old_backend_isas: &[isa::Isa],
|
||||
new_backend_isas: &[isa::Isa],
|
||||
out_dir: &str,
|
||||
) -> Result<(), error::Error> {
|
||||
// Create all the definitions:
|
||||
// - common definitions.
|
||||
let mut shared_defs = shared::define();
|
||||
|
@ -39,7 +43,7 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
|||
gen_types::generate("types.rs", &out_dir)?;
|
||||
|
||||
// - per ISA definitions.
|
||||
let isas = isa::define(isas, &mut shared_defs);
|
||||
let target_isas = isa::define(old_backend_isas, &mut shared_defs);
|
||||
|
||||
// At this point, all definitions are done.
|
||||
let all_formats = shared_defs.verify_instruction_formats();
|
||||
|
@ -53,9 +57,22 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
|||
&out_dir,
|
||||
)?;
|
||||
|
||||
gen_legalizer::generate(&isas, &shared_defs.transform_groups, "legalize", &out_dir)?;
|
||||
let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() {
|
||||
// The new backend only requires the "expand" legalization group.
|
||||
&["expand"]
|
||||
} else {
|
||||
&[]
|
||||
};
|
||||
|
||||
for isa in isas {
|
||||
gen_legalizer::generate(
|
||||
&target_isas,
|
||||
&shared_defs.transform_groups,
|
||||
extra_legalization_groups,
|
||||
"legalize",
|
||||
&out_dir,
|
||||
)?;
|
||||
|
||||
for isa in target_isas {
|
||||
gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;
|
||||
|
||||
gen_settings::generate(
|
||||
|
@ -80,5 +97,28 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
|||
)?;
|
||||
}
|
||||
|
||||
for isa in new_backend_isas {
|
||||
match isa {
|
||||
isa::Isa::X86 => {
|
||||
// If the old backend ISAs contained x86, this file has already been generated.
|
||||
if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let settings = crate::isa::x86::settings::define(&shared_defs.settings);
|
||||
gen_settings::generate(
|
||||
&settings,
|
||||
gen_settings::ParentGroup::Shared,
|
||||
"settings-x86.rs",
|
||||
&out_dir,
|
||||
)?;
|
||||
}
|
||||
isa::Isa::Arm64 => {
|
||||
// aarch64 doesn't have platform-specific settings.
|
||||
}
|
||||
isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -3883,6 +3883,126 @@ pub(crate) fn define(
|
|||
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
||||
);
|
||||
|
||||
let I16or32xN = &TypeVar::new(
|
||||
"I16or32xN",
|
||||
"A SIMD vector type containing integer lanes 16 or 32 bits wide",
|
||||
TypeSetBuilder::new()
|
||||
.ints(16..32)
|
||||
.simd_lanes(4..8)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I16or32xN);
|
||||
let y = &Operand::new("y", I16or32xN);
|
||||
let a = &Operand::new("a", &I16or32xN.split_lanes());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"snarrow",
|
||||
r#"
|
||||
Combine `x` and `y` into a vector with twice the lanes but half the integer width while
|
||||
saturating overflowing values to the signed maximum and minimum.
|
||||
|
||||
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
|
||||
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
|
||||
returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"unarrow",
|
||||
r#"
|
||||
Combine `x` and `y` into a vector with twice the lanes but half the integer width while
|
||||
saturating overflowing values to the unsigned maximum and minimum.
|
||||
|
||||
Note that all input lanes are considered signed: any negative lanes will overflow and be
|
||||
replaced with the unsigned minimum, `0x00`.
|
||||
|
||||
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
|
||||
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
|
||||
returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I8or16xN = &TypeVar::new(
|
||||
"I8or16xN",
|
||||
"A SIMD vector type containing integer lanes 8 or 16 bits wide.",
|
||||
TypeSetBuilder::new()
|
||||
.ints(8..16)
|
||||
.simd_lanes(8..16)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I8or16xN);
|
||||
let a = &Operand::new("a", &I8or16xN.merge_lanes());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"swiden_low",
|
||||
r#"
|
||||
Widen the low lanes of `x` using signed extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"swiden_high",
|
||||
r#"
|
||||
Widen the high lanes of `x` using signed extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uwiden_low",
|
||||
r#"
|
||||
Widen the low lanes of `x` using unsigned extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uwiden_high",
|
||||
r#"
|
||||
Widen the high lanes of `x` using unsigned extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IntTo = &TypeVar::new(
|
||||
"IntTo",
|
||||
"A larger integer type with the same number of lanes",
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"add374d0c310a5bdcd081ea5c9b87f3cd99e78fd9f94e1318b386da6a6d60c08","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
||||
{"files":{"Cargo.toml":"f091891e7b42864e1ef40c5c30724d785403727692ae66b623888367c329efcd","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "Low-level code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-codegen"
|
||||
|
@ -13,9 +13,9 @@ build = "build.rs"
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.65.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.65.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.65.0" }
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.66.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.66.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.66.0" }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
target-lexicon = "0.10"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
|
@ -26,15 +26,15 @@ smallvec = { version = "1.0.0" }
|
|||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.2.0" }
|
||||
regalloc = "0.0.26"
|
||||
regalloc = { version = "0.0.28" }
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
# accomodated in `tests`.
|
||||
|
||||
[build-dependencies]
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.65.0" }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.65.0" }
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.66.0" }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
|
@ -66,7 +66,6 @@ x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel
|
|||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
"x64",
|
||||
"arm32",
|
||||
"arm64",
|
||||
"riscv"
|
||||
|
|
|
@ -26,7 +26,15 @@ fn main() {
|
|||
let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
|
||||
let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");
|
||||
|
||||
// Configure isa targets cfg.
|
||||
let new_backend_isas = if env::var("CARGO_FEATURE_X64").is_ok() {
|
||||
// The x64 (new backend for x86_64) is a bit particular: it only requires generating
|
||||
// the shared meta code; the only ISA-specific code is for settings.
|
||||
vec![meta::isa::Isa::X86]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// Configure isa targets using the old backend.
|
||||
let isa_targets = meta::isa::Isa::all()
|
||||
.iter()
|
||||
.cloned()
|
||||
|
@ -36,7 +44,7 @@ fn main() {
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let isas = if isa_targets.is_empty() {
|
||||
let old_backend_isas = if new_backend_isas.is_empty() && isa_targets.is_empty() {
|
||||
// Try to match native target.
|
||||
let target_name = target_triple.split('-').next().unwrap();
|
||||
let isa = meta::isa_from_arch(&target_name).expect("error when identifying target");
|
||||
|
@ -56,14 +64,23 @@ fn main() {
|
|||
crate_dir.join("build.rs").to_str().unwrap()
|
||||
);
|
||||
|
||||
if let Err(err) = meta::generate(&isas, &out_dir) {
|
||||
if let Err(err) = meta::generate(&old_backend_isas, &new_backend_isas, &out_dir) {
|
||||
eprintln!("Error: {}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if env::var("CRANELIFT_VERBOSE").is_ok() {
|
||||
for isa in &isas {
|
||||
println!("cargo:warning=Includes support for {} ISA", isa.to_string());
|
||||
for isa in &old_backend_isas {
|
||||
println!(
|
||||
"cargo:warning=Includes old-backend support for {} ISA",
|
||||
isa.to_string()
|
||||
);
|
||||
}
|
||||
for isa in &new_backend_isas {
|
||||
println!(
|
||||
"cargo:warning=Includes new-backend support for {} ISA",
|
||||
isa.to_string()
|
||||
);
|
||||
}
|
||||
println!(
|
||||
"cargo:warning=Build step took {:?}.",
|
||||
|
|
|
@ -61,3 +61,10 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the
|
||||
/// embedding, and so requires reftyped values to be enumerated with a stackmap)?
|
||||
pub fn is_safepoint(func: &Function, inst: Inst) -> bool {
|
||||
let op = func.dfg[inst].opcode();
|
||||
op.is_resumable_trap() || op.is_call()
|
||||
}
|
||||
|
|
|
@ -166,7 +166,7 @@ pub type ConstantOffset = u32;
|
|||
/// function body); because the function is not yet compiled when constants are inserted,
|
||||
/// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset
|
||||
/// from the beginning of the function is known (see
|
||||
/// [`relaxation.rs`](crate::binemit::relaxation)).
|
||||
/// `relaxation` in `relaxation.rs`).
|
||||
#[derive(Clone)]
|
||||
pub struct ConstantPoolEntry {
|
||||
data: ConstantData,
|
||||
|
|
|
@ -11,6 +11,8 @@ use crate::machinst::RelocDistance;
|
|||
use alloc::vec::Vec;
|
||||
use core::fmt;
|
||||
use core::str::FromStr;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Function signature.
|
||||
///
|
||||
|
@ -20,6 +22,7 @@ use core::str::FromStr;
|
|||
/// A signature can optionally include ISA-specific ABI information which specifies exactly how
|
||||
/// arguments and return values are passed.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Signature {
|
||||
/// The arguments passed to the function.
|
||||
pub params: Vec<AbiParam>,
|
||||
|
@ -145,6 +148,7 @@ impl fmt::Display for Signature {
|
|||
/// This describes the value type being passed to or from a function along with flags that affect
|
||||
/// how the argument is passed.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct AbiParam {
|
||||
/// Type of the argument value.
|
||||
pub value_type: Type,
|
||||
|
@ -255,6 +259,7 @@ impl fmt::Display for AbiParam {
|
|||
/// On some architectures, small integer function arguments are extended to the width of a
|
||||
/// general-purpose register.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum ArgumentExtension {
|
||||
/// No extension, high bits are indeterminate.
|
||||
None,
|
||||
|
@ -272,6 +277,7 @@ pub enum ArgumentExtension {
|
|||
///
|
||||
/// The argument purpose is used to indicate any special meaning of an argument or return value.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum ArgumentPurpose {
|
||||
/// A normal user program value passed to or from a function.
|
||||
Normal,
|
||||
|
|
|
@ -8,6 +8,8 @@ use alloc::vec::Vec;
|
|||
use core::fmt::{self, Display, Formatter};
|
||||
use core::str::FromStr;
|
||||
use core::{i32, u32};
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Convert a type into a vector of bytes; all implementors in this file must use little-endian
|
||||
/// orderings of bytes to match WebAssembly's little-endianness.
|
||||
|
@ -325,6 +327,7 @@ impl FromStr for Uimm32 {
|
|||
///
|
||||
/// This is used as an immediate value in SIMD instructions.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct V128Imm(pub [u8; 16]);
|
||||
|
||||
impl V128Imm {
|
||||
|
|
|
@ -584,6 +584,9 @@ enum OperandConstraint {
|
|||
|
||||
/// This operand is `ctrlType.split_lanes()`.
|
||||
SplitLanes,
|
||||
|
||||
/// This operand is `ctrlType.merge_lanes()`.
|
||||
MergeLanes,
|
||||
}
|
||||
|
||||
impl OperandConstraint {
|
||||
|
@ -615,6 +618,11 @@ impl OperandConstraint {
|
|||
.split_lanes()
|
||||
.expect("invalid type for split_lanes"),
|
||||
),
|
||||
MergeLanes => Bound(
|
||||
ctrl_type
|
||||
.merge_lanes()
|
||||
.expect("invalid type for merge_lanes"),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
use core::default::Default;
|
||||
use core::fmt::{self, Debug, Display, Formatter};
|
||||
use cranelift_codegen_shared::constants;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
/// The type of an SSA value.
|
||||
|
@ -21,6 +23,7 @@ use target_lexicon::{PointerWidth, Triple};
|
|||
/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
|
||||
///
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Type(u8);
|
||||
|
||||
/// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector.
|
||||
|
@ -281,7 +284,7 @@ impl Type {
|
|||
|
||||
/// Split the lane width in half and double the number of lanes to maintain the same bit-width.
|
||||
///
|
||||
/// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2.
|
||||
/// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`.
|
||||
pub fn split_lanes(self) -> Option<Self> {
|
||||
match self.half_width() {
|
||||
Some(half_width) => half_width.by(2),
|
||||
|
@ -289,6 +292,17 @@ impl Type {
|
|||
}
|
||||
}
|
||||
|
||||
/// Merge lanes to half the number of lanes and double the lane width to maintain the same
|
||||
/// bit-width.
|
||||
///
|
||||
/// If this is a scalar type, it will return `None`.
|
||||
pub fn merge_lanes(self) -> Option<Self> {
|
||||
match self.double_width() {
|
||||
Some(double_width) => double_width.half_vector(),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Index of this type, for use with hash tables etc.
|
||||
pub fn index(self) -> usize {
|
||||
usize::from(self.0)
|
||||
|
|
|
@ -98,6 +98,7 @@ impl<'a> fmt::Display for DisplayValueLoc<'a> {
|
|||
/// - For register arguments, there is usually no difference, but if we ever add support for a
|
||||
/// register-window ISA like SPARC, register arguments would also need to be translated.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum ArgumentLoc {
|
||||
/// This argument has not been assigned to a location yet.
|
||||
Unassigned,
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
//! being adjusted to set up a call, we implement a "nominal SP" tracking
|
||||
//! feature by which a fixup (distance between actual SP and a "nominal" SP) is
|
||||
//! known at each instruction. See the documentation for
|
||||
//! [MemArg::NominalSPOffset] for more on this.
|
||||
//! `MemArg::NominalSPOffset` for more on this.
|
||||
//!
|
||||
//! The stack looks like:
|
||||
//!
|
||||
|
@ -90,12 +90,13 @@
|
|||
//! - Return v1 in memory at `[P+8]`.
|
||||
//! - Return v0 in memory at `[P+16]`.
|
||||
|
||||
use crate::binemit::Stackmap;
|
||||
use crate::ir;
|
||||
use crate::ir::types;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{ArgumentExtension, StackSlot};
|
||||
use crate::isa;
|
||||
use crate::isa::aarch64::{inst::*, lower::ty_bits};
|
||||
use crate::isa::aarch64::{inst::EmitState, inst::*, lower::ty_bits};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
@ -372,7 +373,10 @@ pub struct AArch64ABIBody {
|
|||
clobbered: Set<Writable<RealReg>>,
|
||||
/// Total number of spillslots, from regalloc.
|
||||
spillslots: Option<usize>,
|
||||
/// Total frame size.
|
||||
/// "Total frame size", as defined by "distance between FP and nominal-SP".
|
||||
/// Some items are pushed below nominal SP, so the function may actually use
|
||||
/// more stack than this would otherwise imply. It is simply the initial
|
||||
/// frame/allocation size needed for stackslots and spillslots.
|
||||
total_frame_size: Option<u32>,
|
||||
/// The register holding the return-area pointer, if needed.
|
||||
ret_area_ptr: Option<Writable<Reg>>,
|
||||
|
@ -400,6 +404,8 @@ fn in_int_reg(ty: ir::Type) -> bool {
|
|||
match ty {
|
||||
types::I8 | types::I16 | types::I32 | types::I64 => true,
|
||||
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
|
||||
types::R64 => true,
|
||||
types::R32 => panic!("Unexpected 32-bit reference on a 64-bit platform!"),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -631,14 +637,11 @@ impl AArch64ABIBody {
|
|||
rn: stack_reg(),
|
||||
rm: stack_limit,
|
||||
});
|
||||
insts.push(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
// Here `Hs` == "higher or same" when interpreting the two
|
||||
// operands as unsigned integers.
|
||||
kind: CondBrKind::Cond(Cond::Hs),
|
||||
});
|
||||
insts.push(Inst::Udf {
|
||||
insts.push(Inst::TrapIf {
|
||||
trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
|
||||
// Here `Lo` == "less than" when interpreting the two
|
||||
// operands as unsigned integers.
|
||||
kind: CondBrKind::Cond(Cond::Lo),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -656,12 +659,12 @@ fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
|||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::B32 | types::I32 => Inst::ULoad32 {
|
||||
types::B32 | types::I32 | types::R32 => Inst::ULoad32 {
|
||||
rd: into_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::B64 | types::I64 => Inst::ULoad64 {
|
||||
types::B64 | types::I64 | types::R64 => Inst::ULoad64 {
|
||||
rd: into_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
|
@ -692,12 +695,12 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
|
|||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::B32 | types::I32 => Inst::Store32 {
|
||||
types::B32 | types::I32 | types::R32 => Inst::Store32 {
|
||||
rd: from_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
},
|
||||
types::B64 | types::I64 => Inst::Store64 {
|
||||
types::B64 | types::I64 | types::R64 => Inst::Store64 {
|
||||
rd: from_reg,
|
||||
mem,
|
||||
srcloc: None,
|
||||
|
@ -813,6 +816,35 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
|||
caller_saved
|
||||
}
|
||||
|
||||
fn gen_sp_adjust_insts<F: FnMut(Inst)>(adj: u64, is_sub: bool, mut f: F) {
|
||||
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
|
||||
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(adj) {
|
||||
let adj_inst = Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12,
|
||||
};
|
||||
f(adj_inst);
|
||||
} else {
|
||||
let tmp = writable_spilltmp_reg();
|
||||
let const_inst = Inst::LoadConst64 {
|
||||
rd: tmp,
|
||||
const_data: adj,
|
||||
};
|
||||
let adj_inst = Inst::AluRRRExtend {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
rm: tmp.to_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
};
|
||||
f(const_inst);
|
||||
f(adj_inst);
|
||||
}
|
||||
}
|
||||
|
||||
impl ABIBody for AArch64ABIBody {
|
||||
type I = Inst;
|
||||
|
||||
|
@ -1027,6 +1059,29 @@ impl ABIBody for AArch64ABIBody {
|
|||
store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty)
|
||||
}
|
||||
|
||||
fn spillslots_to_stackmap(&self, slots: &[SpillSlot], state: &EmitState) -> Stackmap {
|
||||
assert!(state.virtual_sp_offset >= 0);
|
||||
trace!(
|
||||
"spillslots_to_stackmap: slots = {:?}, state = {:?}",
|
||||
slots,
|
||||
state
|
||||
);
|
||||
let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32;
|
||||
let map_words = (map_size + 7) / 8;
|
||||
let mut bits = std::iter::repeat(false)
|
||||
.take(map_words as usize)
|
||||
.collect::<Vec<bool>>();
|
||||
|
||||
let first_spillslot_word =
|
||||
((self.stackslots_size + state.virtual_sp_offset as u32) / 8) as usize;
|
||||
for &slot in slots {
|
||||
let slot = slot.get() as usize;
|
||||
bits[first_spillslot_word + slot] = true;
|
||||
}
|
||||
|
||||
Stackmap::from_slice(&bits[..])
|
||||
}
|
||||
|
||||
fn gen_prologue(&mut self) -> Vec<Inst> {
|
||||
let mut insts = vec![];
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
|
@ -1062,6 +1117,9 @@ impl ABIBody for AArch64ABIBody {
|
|||
}
|
||||
let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack.
|
||||
|
||||
let mut total_sp_adjust = 0;
|
||||
let mut nominal_sp_to_real_sp = 0;
|
||||
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
// Leaf functions with zero stack don't need a stack check if one's
|
||||
// specified, otherwise always insert the stack check.
|
||||
|
@ -1072,42 +1130,29 @@ impl ABIBody for AArch64ABIBody {
|
|||
}
|
||||
}
|
||||
if total_stacksize > 0 {
|
||||
// sub sp, sp, #total_stacksize
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) {
|
||||
let sub_inst = Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12,
|
||||
};
|
||||
insts.push(sub_inst);
|
||||
} else {
|
||||
let tmp = writable_spilltmp_reg();
|
||||
let const_inst = Inst::LoadConst64 {
|
||||
rd: tmp,
|
||||
const_data: total_stacksize as u64,
|
||||
};
|
||||
let sub_inst = Inst::AluRRRExtend {
|
||||
alu_op: ALUOp::Sub64,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
rm: tmp.to_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
};
|
||||
insts.push(const_inst);
|
||||
insts.push(sub_inst);
|
||||
}
|
||||
total_sp_adjust += total_stacksize as u64;
|
||||
}
|
||||
}
|
||||
|
||||
// N.B.: "nominal SP", which we use to refer to stackslots
|
||||
// and spillslots, is *here* (the value of SP at this program point).
|
||||
// N.B.: "nominal SP", which we use to refer to stackslots and
|
||||
// spillslots, is defined to be equal to the stack pointer at this point
|
||||
// in the prologue.
|
||||
//
|
||||
// If we push any clobbers below, we emit a virtual-SP adjustment
|
||||
// meta-instruction so that the nominal-SP references behave as if SP
|
||||
// were still at this point. See documentation for
|
||||
// [crate::isa::aarch64::abi](this module) for more details on
|
||||
// stackframe layout and nominal-SP maintenance.
|
||||
|
||||
if total_sp_adjust > 0 {
|
||||
// sub sp, sp, #total_stacksize
|
||||
gen_sp_adjust_insts(
|
||||
total_sp_adjust,
|
||||
/* is_sub = */ true,
|
||||
|inst| insts.push(inst),
|
||||
);
|
||||
}
|
||||
|
||||
// Save clobbered registers.
|
||||
let (clobbered_int, clobbered_vec) =
|
||||
get_callee_saves(self.call_conv, self.clobbered.to_vec());
|
||||
|
@ -1151,10 +1196,11 @@ impl ABIBody for AArch64ABIBody {
|
|||
srcloc: None,
|
||||
});
|
||||
}
|
||||
nominal_sp_to_real_sp += clobber_size as i64;
|
||||
|
||||
if clobber_size > 0 {
|
||||
insts.push(Inst::VirtualSPOffsetAdj {
|
||||
offset: clobber_size as i64,
|
||||
offset: nominal_sp_to_real_sp,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1248,6 +1294,10 @@ impl ABIBody for AArch64ABIBody {
|
|||
.expect("frame size not computed before prologue generation")
|
||||
}
|
||||
|
||||
fn stack_args_size(&self) -> u32 {
|
||||
self.sig.stack_arg_space as u32
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
|
@ -1258,15 +1308,42 @@ impl ABIBody for AArch64ABIBody {
|
|||
}
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst {
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Inst {
|
||||
let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty);
|
||||
self.store_spillslot(to_slot, ty, from_reg.to_reg())
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Inst {
|
||||
fn gen_reload(
|
||||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
ty: Option<Type>,
|
||||
) -> Inst {
|
||||
let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty);
|
||||
self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a type either from an optional type hint, or if not, from the default
|
||||
/// type associated with the given register's class. This is used to generate
|
||||
/// loads/spills appropriately given the type of value loaded/stored (which may
|
||||
/// be narrower than the spillslot). We usually have the type because the
|
||||
/// regalloc usually provides the vreg being spilled/reloaded, and we know every
|
||||
/// vreg's type. However, the regalloc *can* request a spill/reload without an
|
||||
/// associated vreg when needed to satisfy a safepoint (which requires all
|
||||
/// ref-typed values, even those in real registers in the original vcode, to be
|
||||
/// in spillslots).
|
||||
fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
|
||||
match (ty, r.get_class()) {
|
||||
// If the type is provided
|
||||
(Some(t), _) => t,
|
||||
// If no type is provided, this should be a register spill for a
|
||||
// safepoint, so we only expect I64 (integer) registers.
|
||||
(None, RegClass::I64) => I64,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
enum CallDest {
|
||||
ExtName(ir::ExternalName, RelocDistance),
|
||||
Reg(Reg),
|
||||
|
@ -1345,7 +1422,7 @@ impl AArch64ABICall {
|
|||
}
|
||||
}
|
||||
|
||||
fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
|
||||
fn adjust_stack_and_nominal_sp<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
|
||||
if amount == 0 {
|
||||
return;
|
||||
}
|
||||
|
@ -1359,27 +1436,9 @@ fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
|
|||
offset: sp_adjustment,
|
||||
});
|
||||
|
||||
let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(amount) {
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
imm12,
|
||||
})
|
||||
} else {
|
||||
ctx.emit(Inst::LoadConst64 {
|
||||
rd: writable_spilltmp_reg(),
|
||||
const_data: amount,
|
||||
});
|
||||
ctx.emit(Inst::AluRRRExtend {
|
||||
alu_op,
|
||||
rd: writable_stack_reg(),
|
||||
rn: stack_reg(),
|
||||
rm: spilltmp_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
});
|
||||
}
|
||||
gen_sp_adjust_insts(amount, is_sub, |inst| {
|
||||
ctx.emit(inst);
|
||||
});
|
||||
}
|
||||
|
||||
impl ABICall for AArch64ABICall {
|
||||
|
@ -1395,12 +1454,12 @@ impl ABICall for AArch64ABICall {
|
|||
|
||||
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
|
||||
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
|
||||
adjust_stack(ctx, off as u64, /* is_sub = */ true)
|
||||
adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ true)
|
||||
}
|
||||
|
||||
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
|
||||
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
|
||||
adjust_stack(ctx, off as u64, /* is_sub = */ false)
|
||||
adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ false)
|
||||
}
|
||||
|
||||
fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
|
||||
|
@ -1455,7 +1514,7 @@ impl ABICall for AArch64ABICall {
|
|||
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
|
||||
}
|
||||
match &self.dest {
|
||||
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call {
|
||||
&CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint(Inst::Call {
|
||||
info: Box::new(CallInfo {
|
||||
dest: name.clone(),
|
||||
uses,
|
||||
|
@ -1471,7 +1530,7 @@ impl ABICall for AArch64ABICall {
|
|||
offset: 0,
|
||||
srcloc: self.loc,
|
||||
});
|
||||
ctx.emit(Inst::CallInd {
|
||||
ctx.emit_safepoint(Inst::CallInd {
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: spilltmp_reg(),
|
||||
uses,
|
||||
|
@ -1481,7 +1540,7 @@ impl ABICall for AArch64ABICall {
|
|||
}),
|
||||
});
|
||||
}
|
||||
&CallDest::Reg(reg) => ctx.emit(Inst::CallInd {
|
||||
&CallDest::Reg(reg) => ctx.emit_safepoint(Inst::CallInd {
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: reg,
|
||||
uses,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
|
@ -403,8 +404,8 @@ impl ShowWithRRU for MemArg {
|
|||
&MemArg::RegScaledExtended(r1, r2, ty, op) => {
|
||||
let shift = shift_for_type(ty);
|
||||
let size = match op {
|
||||
ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
|
||||
_ => InstSize::Size64,
|
||||
ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
|
||||
_ => OperandSize::Size64,
|
||||
};
|
||||
let op = op.show_rru(mb_rru);
|
||||
format!(
|
||||
|
@ -417,8 +418,8 @@ impl ShowWithRRU for MemArg {
|
|||
}
|
||||
&MemArg::RegExtended(r1, r2, op) => {
|
||||
let size = match op {
|
||||
ExtendOp::SXTW | ExtendOp::UXTW => InstSize::Size32,
|
||||
_ => InstSize::Size64,
|
||||
ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
|
||||
_ => OperandSize::Size64,
|
||||
};
|
||||
let op = op.show_rru(mb_rru);
|
||||
format!(
|
||||
|
@ -492,67 +493,150 @@ impl ShowWithRRU for BranchTarget {
|
|||
}
|
||||
|
||||
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
|
||||
/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
|
||||
/// variants of vector instructions.
|
||||
/// TODO: Create a separate type for SIMD & floating-point operands.
|
||||
/// 64-bit variants of many instructions (and integer registers).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstSize {
|
||||
pub enum OperandSize {
|
||||
Size32,
|
||||
Size64,
|
||||
Size128,
|
||||
}
|
||||
|
||||
impl InstSize {
|
||||
impl OperandSize {
|
||||
/// 32-bit case?
|
||||
pub fn is32(self) -> bool {
|
||||
self == InstSize::Size32
|
||||
self == OperandSize::Size32
|
||||
}
|
||||
/// 64-bit case?
|
||||
pub fn is64(self) -> bool {
|
||||
self == InstSize::Size64
|
||||
self == OperandSize::Size64
|
||||
}
|
||||
/// Convert from an `is32` boolean flag to an `InstSize`.
|
||||
pub fn from_is32(is32: bool) -> InstSize {
|
||||
/// Convert from an `is32` boolean flag to an `OperandSize`.
|
||||
pub fn from_is32(is32: bool) -> OperandSize {
|
||||
if is32 {
|
||||
InstSize::Size32
|
||||
OperandSize::Size32
|
||||
} else {
|
||||
InstSize::Size64
|
||||
OperandSize::Size64
|
||||
}
|
||||
}
|
||||
/// Convert from a needed width to the smallest size that fits.
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize {
|
||||
let bits: usize = bits.into();
|
||||
assert!(bits <= 128);
|
||||
assert!(bits <= 64);
|
||||
if bits <= 32 {
|
||||
InstSize::Size32
|
||||
} else if bits <= 64 {
|
||||
InstSize::Size64
|
||||
OperandSize::Size32
|
||||
} else {
|
||||
InstSize::Size128
|
||||
OperandSize::Size64
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from an integer type into the smallest size that fits.
|
||||
pub fn from_ty(ty: Type) -> InstSize {
|
||||
pub fn from_ty(ty: Type) -> OperandSize {
|
||||
Self::from_bits(ty_bits(ty))
|
||||
}
|
||||
|
||||
/// Convert to I32, I64, or I128.
|
||||
pub fn to_ty(self) -> Type {
|
||||
match self {
|
||||
InstSize::Size32 => I32,
|
||||
InstSize::Size64 => I64,
|
||||
InstSize::Size128 => I128,
|
||||
OperandSize::Size32 => I32,
|
||||
OperandSize::Size64 => I64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sf_bit(&self) -> u32 {
|
||||
match self {
|
||||
InstSize::Size32 => 0,
|
||||
InstSize::Size64 => 1,
|
||||
_ => {
|
||||
panic!("Unexpected size");
|
||||
}
|
||||
OperandSize::Size32 => 0,
|
||||
OperandSize::Size64 => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the size of a scalar SIMD & FP operand.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ScalarSize {
|
||||
Size8,
|
||||
Size16,
|
||||
Size32,
|
||||
Size64,
|
||||
Size128,
|
||||
}
|
||||
|
||||
impl ScalarSize {
|
||||
/// Convert from a needed width to the smallest size that fits.
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
|
||||
match bits.into().next_power_of_two() {
|
||||
8 => ScalarSize::Size8,
|
||||
16 => ScalarSize::Size16,
|
||||
32 => ScalarSize::Size32,
|
||||
64 => ScalarSize::Size64,
|
||||
128 => ScalarSize::Size128,
|
||||
_ => panic!("Unexpected type width"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from a type into the smallest size that fits.
|
||||
pub fn from_ty(ty: Type) -> ScalarSize {
|
||||
Self::from_bits(ty_bits(ty))
|
||||
}
|
||||
|
||||
/// Return the encoding bits that are used by some scalar FP instructions
|
||||
/// for a particular operand size.
|
||||
pub fn ftype(&self) -> u32 {
|
||||
match self {
|
||||
ScalarSize::Size16 => 0b11,
|
||||
ScalarSize::Size32 => 0b00,
|
||||
ScalarSize::Size64 => 0b01,
|
||||
_ => panic!("Unexpected scalar FP operand size"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the size of a vector operand.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum VectorSize {
|
||||
Size8x8,
|
||||
Size8x16,
|
||||
Size16x4,
|
||||
Size16x8,
|
||||
Size32x2,
|
||||
Size32x4,
|
||||
Size64x2,
|
||||
}
|
||||
|
||||
impl VectorSize {
|
||||
/// Convert from a type into a vector operand size.
|
||||
pub fn from_ty(ty: Type) -> VectorSize {
|
||||
match ty {
|
||||
F32X2 => VectorSize::Size32x2,
|
||||
F32X4 => VectorSize::Size32x4,
|
||||
F64X2 => VectorSize::Size64x2,
|
||||
I8X8 => VectorSize::Size8x8,
|
||||
I8X16 => VectorSize::Size8x16,
|
||||
I16X4 => VectorSize::Size16x4,
|
||||
I16X8 => VectorSize::Size16x8,
|
||||
I32X2 => VectorSize::Size32x2,
|
||||
I32X4 => VectorSize::Size32x4,
|
||||
I64X2 => VectorSize::Size64x2,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn operand_size(&self) -> OperandSize {
|
||||
match self {
|
||||
VectorSize::Size64x2 => OperandSize::Size64,
|
||||
_ => OperandSize::Size32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn lane_size(&self) -> ScalarSize {
|
||||
match self {
|
||||
VectorSize::Size8x8 => ScalarSize::Size8,
|
||||
VectorSize::Size8x16 => ScalarSize::Size8,
|
||||
VectorSize::Size16x4 => ScalarSize::Size16,
|
||||
VectorSize::Size16x8 => ScalarSize::Size16,
|
||||
VectorSize::Size32x2 => ScalarSize::Size32,
|
||||
VectorSize::Size32x4 => ScalarSize::Size32,
|
||||
VectorSize::Size64x2 => ScalarSize::Size64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
//! AArch64 ISA: binary code emission.
|
||||
|
||||
use crate::binemit::{CodeOffset, Reloc};
|
||||
use crate::binemit::{CodeOffset, Reloc, Stackmap};
|
||||
use crate::ir::constant::ConstantData;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::TrapCode;
|
||||
|
@ -282,14 +282,13 @@ fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
|
|||
| (cond.bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: InstSize) -> u32 {
|
||||
let ty_bit = if size.is32() { 0 } else { 1 };
|
||||
fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
|
||||
0b000_11110_00_1_00000_0000_11_00000_00000
|
||||
| (size.ftype() << 22)
|
||||
| (machreg_to_vec(rm) << 16)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
| (cond.bits() << 12)
|
||||
| (ty_bit << 22)
|
||||
}
|
||||
|
||||
fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
|
||||
|
@ -298,7 +297,7 @@ fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
|
|||
| (cond.invert().bits() << 12)
|
||||
}
|
||||
|
||||
fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
|
||||
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
|
||||
0b0_1_1_11010010_00000_0000_10_00000_0_0000
|
||||
| size.sf_bit() << 31
|
||||
| imm.bits() << 16
|
||||
|
@ -334,13 +333,11 @@ fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32
|
|||
| machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_fcmp(size: InstSize, rn: Reg, rm: Reg) -> u32 {
|
||||
let bits = if size.is32() {
|
||||
0b000_11110_00_1_00000_00_1000_00000_00000
|
||||
} else {
|
||||
0b000_11110_01_1_00000_00_1000_00000_00000
|
||||
};
|
||||
bits | (machreg_to_vec(rm) << 16) | (machreg_to_vec(rn) << 5)
|
||||
fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
|
||||
0b000_11110_00_1_00000_00_1000_00000_00000
|
||||
| (size.ftype() << 22)
|
||||
| (machreg_to_vec(rm) << 16)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
}
|
||||
|
||||
fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
|
@ -355,10 +352,11 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
fn enc_vec_rr_misc(size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(size & 0b11, size);
|
||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||
let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
|
||||
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||
bits | size << 22 | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
|
@ -378,7 +376,37 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn:
|
|||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
virtual_sp_offset: i64,
|
||||
/// Addend to convert nominal-SP offsets to real-SP offsets at the current
|
||||
/// program point.
|
||||
pub(crate) virtual_sp_offset: i64,
|
||||
/// Offset of FP from nominal-SP.
|
||||
pub(crate) nominal_sp_to_fp: i64,
|
||||
/// Safepoint stackmap for upcoming instruction, as provided to `pre_safepoint()`.
|
||||
stackmap: Option<Stackmap>,
|
||||
}
|
||||
|
||||
impl MachInstEmitState<Inst> for EmitState {
|
||||
fn new(abi: &dyn ABIBody<I = Inst>) -> Self {
|
||||
EmitState {
|
||||
virtual_sp_offset: 0,
|
||||
nominal_sp_to_fp: abi.frame_size() as i64,
|
||||
stackmap: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn pre_safepoint(&mut self, stackmap: Stackmap) {
|
||||
self.stackmap = Some(stackmap);
|
||||
}
|
||||
}
|
||||
|
||||
impl EmitState {
|
||||
fn take_stackmap(&mut self) -> Option<Stackmap> {
|
||||
self.stackmap.take()
|
||||
}
|
||||
|
||||
fn clear_post_insn(&mut self) {
|
||||
self.stackmap = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
|
@ -533,8 +561,16 @@ impl MachInstEmit for Inst {
|
|||
ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
|
||||
ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
|
||||
ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
|
||||
ALUOp::Lsl32 => (0b0101001100, u32::from(32 - amt), u32::from(31 - amt)),
|
||||
ALUOp::Lsl64 => (0b1101001101, u32::from(64 - amt), u32::from(63 - amt)),
|
||||
ALUOp::Lsl32 => (
|
||||
0b0101001100,
|
||||
u32::from((32 - amt) % 32),
|
||||
u32::from(31 - amt),
|
||||
),
|
||||
ALUOp::Lsl64 => (
|
||||
0b1101001101,
|
||||
u32::from((64 - amt) % 64),
|
||||
u32::from(63 - amt),
|
||||
),
|
||||
_ => unimplemented!("{:?}", alu_op),
|
||||
};
|
||||
sink.put4(
|
||||
|
@ -604,7 +640,7 @@ impl MachInstEmit for Inst {
|
|||
}
|
||||
|
||||
&Inst::BitRR { op, rd, rn, .. } => {
|
||||
let size = if op.inst_size().is32() { 0b0 } else { 0b1 };
|
||||
let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
|
||||
let (op1, op2) = match op {
|
||||
BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
|
||||
BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
|
||||
|
@ -970,10 +1006,10 @@ impl MachInstEmit for Inst {
|
|||
&Inst::FpuMove128 { rd, rn } => {
|
||||
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
|
||||
}
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, ty } => {
|
||||
let (imm5, shift, mask) = match ty {
|
||||
F32 => (0b00100, 3, 0b011),
|
||||
F64 => (0b01000, 4, 0b001),
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
|
||||
let (imm5, shift, mask) = match size.lane_size() {
|
||||
ScalarSize::Size32 => (0b00100, 3, 0b011),
|
||||
ScalarSize::Size64 => (0b01000, 4, 0b001),
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
debug_assert_eq!(idx & mask, idx);
|
||||
|
@ -1012,6 +1048,10 @@ impl MachInstEmit for Inst {
|
|||
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
|
||||
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
|
||||
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
|
||||
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
|
||||
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
|
||||
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
|
||||
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
|
||||
};
|
||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||
}
|
||||
|
@ -1066,20 +1106,25 @@ impl MachInstEmit for Inst {
|
|||
};
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let bits_12_16 = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
0b00101
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
||||
let (bits_12_16, size) = match op {
|
||||
VecMisc2::Not => (0b00101, 0b00),
|
||||
VecMisc2::Neg => (0b01011, enc_size),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||
let (q, size) = match ty {
|
||||
I8X16 => (0b1, 0b00),
|
||||
I16X8 => (0b1, 0b01),
|
||||
I32X4 => (0b1, 0b10),
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let (q, size) = match size {
|
||||
VectorSize::Size8x16 => (0b1, 0b00),
|
||||
VectorSize::Size16x8 => (0b1, 0b01),
|
||||
VectorSize::Size32x4 => (0b1, 0b10),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (u, opcode) = match op {
|
||||
|
@ -1088,10 +1133,10 @@ impl MachInstEmit for Inst {
|
|||
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
|
||||
}
|
||||
&Inst::FpuCmp32 { rn, rm } => {
|
||||
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
||||
sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
|
||||
}
|
||||
&Inst::FpuCmp64 { rn, rm } => {
|
||||
sink.put4(enc_fcmp(InstSize::Size64, rn, rm));
|
||||
sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
|
||||
}
|
||||
&Inst::FpuToInt { op, rd, rn } => {
|
||||
let top16 = match op {
|
||||
|
@ -1178,10 +1223,10 @@ impl MachInstEmit for Inst {
|
|||
}
|
||||
}
|
||||
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
|
||||
}
|
||||
&Inst::FpuCSel64 { rd, rn, rm, cond } => {
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size64));
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
|
||||
}
|
||||
&Inst::FpuRound { op, rd, rn } => {
|
||||
let top22 = match op {
|
||||
|
@ -1203,12 +1248,12 @@ impl MachInstEmit for Inst {
|
|||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::MovFromVec { rd, rn, idx, ty } => {
|
||||
let (q, imm5, shift, mask) = match ty {
|
||||
I8 => (0b0, 0b00001, 1, 0b1111),
|
||||
I16 => (0b0, 0b00010, 2, 0b0111),
|
||||
I32 => (0b0, 0b00100, 3, 0b0011),
|
||||
I64 => (0b1, 0b01000, 4, 0b0001),
|
||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||
let (q, imm5, shift, mask) = match size {
|
||||
VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
|
||||
VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
|
||||
VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
|
||||
VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
debug_assert_eq!(idx & mask, idx);
|
||||
|
@ -1221,12 +1266,12 @@ impl MachInstEmit for Inst {
|
|||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDup { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
I8 => 0b00001,
|
||||
I16 => 0b00010,
|
||||
I32 => 0b00100,
|
||||
I64 => 0b01000,
|
||||
&Inst::VecDup { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size8x16 => 0b00001,
|
||||
VectorSize::Size16x8 => 0b00010,
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
|
@ -1236,10 +1281,10 @@ impl MachInstEmit for Inst {
|
|||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
F32 => 0b00100,
|
||||
F64 => 0b01000,
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
|
@ -1271,37 +1316,26 @@ impl MachInstEmit for Inst {
|
|||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size,
|
||||
} => {
|
||||
let enc_size = match ty {
|
||||
I8X16 => 0b00,
|
||||
I16X8 => 0b01,
|
||||
I32X4 => 0b10,
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => 0,
|
||||
};
|
||||
let enc_size_for_fcmp = match ty {
|
||||
F32X4 => 0b0,
|
||||
F64X2 => 0b1,
|
||||
let enc_size_for_fcmp = match size {
|
||||
VectorSize::Size32x4 => 0b0,
|
||||
VectorSize::Size64x2 => 0b1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let (top11, bit15_10) = match alu_op {
|
||||
VecALUOp::SQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::SQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::UQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::UQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
||||
|
@ -1312,27 +1346,20 @@ impl MachInstEmit for Inst {
|
|||
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||
// The following logical instructions operate on bytes, so are not encoded differently
|
||||
// for the different vector types.
|
||||
VecALUOp::And => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bic => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Orr => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_10_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Eor => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bsl => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::And => (0b010_01110_00_1, 0b000111),
|
||||
VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
|
||||
VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
|
||||
VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
|
||||
VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
|
||||
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
|
||||
VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Mul => {
|
||||
debug_assert_ne!(size, VectorSize::Size64x2);
|
||||
(0b010_01110_00_1 | enc_size << 1, 0b100111)
|
||||
}
|
||||
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
|
||||
VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001),
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
@ -1437,6 +1464,9 @@ impl MachInstEmit for Inst {
|
|||
// Noop; this is just a placeholder for epilogues.
|
||||
}
|
||||
&Inst::Call { ref info } => {
|
||||
if let Some(s) = state.take_stackmap() {
|
||||
sink.add_stackmap(4, s);
|
||||
}
|
||||
sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
|
||||
sink.put4(enc_jump26(0b100101, 0));
|
||||
if info.opcode.is_call() {
|
||||
|
@ -1444,6 +1474,9 @@ impl MachInstEmit for Inst {
|
|||
}
|
||||
}
|
||||
&Inst::CallInd { ref info } => {
|
||||
if let Some(s) = state.take_stackmap() {
|
||||
sink.add_stackmap(4, s);
|
||||
}
|
||||
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
|
||||
if info.opcode.is_call() {
|
||||
sink.add_call_site(info.loc, info.opcode);
|
||||
|
@ -1471,12 +1504,20 @@ impl MachInstEmit for Inst {
|
|||
}
|
||||
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
|
||||
}
|
||||
&Inst::OneWayCondBr { target, kind } => {
|
||||
&Inst::TrapIf { kind, trap_info } => {
|
||||
// condbr KIND, LABEL
|
||||
let off = sink.cur_offset();
|
||||
if let Some(l) = target.as_label() {
|
||||
sink.use_label_at_offset(off, l, LabelUse::Branch19);
|
||||
}
|
||||
sink.put4(enc_conditional_br(target, kind));
|
||||
let label = sink.get_label();
|
||||
sink.put4(enc_conditional_br(
|
||||
BranchTarget::Label(label),
|
||||
kind.invert(),
|
||||
));
|
||||
sink.use_label_at_offset(off, label, LabelUse::Branch19);
|
||||
// udf
|
||||
let trap = Inst::Udf { trap_info };
|
||||
trap.emit(sink, flags, state);
|
||||
// LABEL:
|
||||
sink.bind_label(label);
|
||||
}
|
||||
&Inst::IndirectBr { rn, .. } => {
|
||||
sink.put4(enc_br(rn));
|
||||
|
@ -1491,6 +1532,9 @@ impl MachInstEmit for Inst {
|
|||
&Inst::Udf { trap_info } => {
|
||||
let (srcloc, code) = trap_info;
|
||||
sink.add_trap(srcloc, code);
|
||||
if let Some(s) = state.take_stackmap() {
|
||||
sink.add_stackmap(4, s);
|
||||
}
|
||||
sink.put4(0xd4a00000);
|
||||
}
|
||||
&Inst::Adr { rd, off } => {
|
||||
|
@ -1515,6 +1559,17 @@ impl MachInstEmit for Inst {
|
|||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
||||
// the middle; we depend on hardcoded PC-rel addressing below.
|
||||
|
||||
// Branch to default when condition code from prior comparison indicates.
|
||||
let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
|
||||
// No need to inform the sink's branch folding logic about this branch, because it
|
||||
// will not be merged with any other branch, flipped, or elided (it is not preceded
|
||||
// or succeeded by any other branch). Just emit it with the label use.
|
||||
let default_br_offset = sink.cur_offset();
|
||||
if let BranchTarget::Label(l) = info.default_target {
|
||||
sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
|
||||
}
|
||||
sink.put4(br);
|
||||
|
||||
// Save index in a tmp (the live range of ridx only goes to start of this
|
||||
// sequence; rtmp1 or rtmp2 may overwrite it).
|
||||
let inst = Inst::gen_move(rtmp2, ridx, I64);
|
||||
|
@ -1553,6 +1608,10 @@ impl MachInstEmit for Inst {
|
|||
let jt_off = sink.cur_offset();
|
||||
for &target in info.targets.iter() {
|
||||
let word_off = sink.cur_offset();
|
||||
// off_into_table is an addend here embedded in the label to be later patched
|
||||
// at the end of codegen. The offset is initially relative to this jump table
|
||||
// entry; with the extra addend, it'll be relative to the jump table's start,
|
||||
// after patching.
|
||||
let off_into_table = word_off - jt_off;
|
||||
sink.use_label_at_offset(
|
||||
word_off,
|
||||
|
@ -1660,7 +1719,7 @@ impl MachInstEmit for Inst {
|
|||
debug!(
|
||||
"virtual sp offset adjusted by {} -> {}",
|
||||
offset,
|
||||
state.virtual_sp_offset + offset
|
||||
state.virtual_sp_offset + offset,
|
||||
);
|
||||
state.virtual_sp_offset += offset;
|
||||
}
|
||||
|
@ -1679,5 +1738,11 @@ impl MachInstEmit for Inst {
|
|||
|
||||
let end_off = sink.cur_offset();
|
||||
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
|
||||
|
||||
state.clear_post_insn();
|
||||
}
|
||||
|
||||
fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
|
||||
self.print_with_state(mb_rru, state)
|
||||
}
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -4,7 +4,7 @@
|
|||
#[allow(dead_code)]
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::InstSize;
|
||||
use crate::isa::aarch64::inst::OperandSize;
|
||||
use crate::machinst::*;
|
||||
|
||||
use regalloc::RealRegUniverse;
|
||||
|
@ -340,7 +340,7 @@ pub struct ImmLogic {
|
|||
/// `R` field: rotate amount.
|
||||
pub s: u8,
|
||||
/// Was this constructed for a 32-bit or 64-bit instruction?
|
||||
pub size: InstSize,
|
||||
pub size: OperandSize,
|
||||
}
|
||||
|
||||
impl ImmLogic {
|
||||
|
@ -351,7 +351,7 @@ impl ImmLogic {
|
|||
if ty != I64 && ty != I32 {
|
||||
return None;
|
||||
}
|
||||
let inst_size = InstSize::from_ty(ty);
|
||||
let operand_size = OperandSize::from_ty(ty);
|
||||
|
||||
let original_value = value;
|
||||
|
||||
|
@ -532,7 +532,7 @@ impl ImmLogic {
|
|||
n: out_n != 0,
|
||||
r: r as u8,
|
||||
s: s as u8,
|
||||
size: inst_size,
|
||||
size: operand_size,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -732,7 +732,7 @@ mod test {
|
|||
n: true,
|
||||
r: 0,
|
||||
s: 0,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(1, I64)
|
||||
);
|
||||
|
@ -743,7 +743,7 @@ mod test {
|
|||
n: true,
|
||||
r: 63,
|
||||
s: 0,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(2, I64)
|
||||
);
|
||||
|
@ -758,7 +758,7 @@ mod test {
|
|||
n: true,
|
||||
r: 61,
|
||||
s: 4,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(248, I64)
|
||||
);
|
||||
|
@ -771,7 +771,7 @@ mod test {
|
|||
n: true,
|
||||
r: 57,
|
||||
s: 3,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(1920, I64)
|
||||
);
|
||||
|
@ -782,7 +782,7 @@ mod test {
|
|||
n: true,
|
||||
r: 63,
|
||||
s: 13,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x7ffe, I64)
|
||||
);
|
||||
|
@ -793,7 +793,7 @@ mod test {
|
|||
n: true,
|
||||
r: 48,
|
||||
s: 1,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x30000, I64)
|
||||
);
|
||||
|
@ -804,7 +804,7 @@ mod test {
|
|||
n: true,
|
||||
r: 44,
|
||||
s: 0,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x100000, I64)
|
||||
);
|
||||
|
@ -815,7 +815,7 @@ mod test {
|
|||
n: true,
|
||||
r: 63,
|
||||
s: 62,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
|
||||
);
|
||||
|
@ -826,7 +826,7 @@ mod test {
|
|||
n: false,
|
||||
r: 1,
|
||||
s: 60,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
|
||||
);
|
||||
|
@ -837,7 +837,7 @@ mod test {
|
|||
n: false,
|
||||
r: 1,
|
||||
s: 49,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x8181818181818181, I64)
|
||||
);
|
||||
|
@ -848,7 +848,7 @@ mod test {
|
|||
n: false,
|
||||
r: 10,
|
||||
s: 43,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
|
||||
);
|
||||
|
@ -859,7 +859,7 @@ mod test {
|
|||
n: false,
|
||||
r: 0,
|
||||
s: 0,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x100000001, I64)
|
||||
);
|
||||
|
@ -870,7 +870,7 @@ mod test {
|
|||
n: false,
|
||||
r: 0,
|
||||
s: 56,
|
||||
size: InstSize::Size64,
|
||||
size: OperandSize::Size64,
|
||||
}),
|
||||
ImmLogic::maybe_from_u64(0x1111111111111111, I64)
|
||||
);
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,7 +1,8 @@
|
|||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::isa::aarch64::inst::InstSize;
|
||||
use crate::isa::aarch64::inst::OperandSize;
|
||||
use crate::isa::aarch64::inst::ScalarSize;
|
||||
use crate::isa::aarch64::inst::VectorSize;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
|
@ -255,7 +256,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
|
||||
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
|
||||
/// its name at the 32-bit size.
|
||||
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::I64 || !size.is32() {
|
||||
// We can't do any better.
|
||||
|
@ -276,23 +277,8 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
|
|||
s
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
return s;
|
||||
}
|
||||
let prefix = match size {
|
||||
InstSize::Size32 => "s",
|
||||
InstSize::Size64 => "d",
|
||||
InstSize::Size128 => "q",
|
||||
};
|
||||
s.replace_range(0..1, prefix);
|
||||
s
|
||||
}
|
||||
|
||||
/// Show a vector register used in a scalar context.
|
||||
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
// We can't do any better.
|
||||
|
@ -301,13 +287,13 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
|||
|
||||
if reg.is_real() {
|
||||
// Change (eg) "v0" into "d0".
|
||||
if reg.get_class() == RegClass::V128 && s.starts_with("v") {
|
||||
let replacement = match ty {
|
||||
I64 | F64 => "d",
|
||||
I8X16 => "b",
|
||||
I16X8 => "h",
|
||||
I32X4 => "s",
|
||||
_ => unimplemented!(),
|
||||
if s.starts_with("v") {
|
||||
let replacement = match size {
|
||||
ScalarSize::Size8 => "b",
|
||||
ScalarSize::Size16 => "h",
|
||||
ScalarSize::Size32 => "s",
|
||||
ScalarSize::Size64 => "d",
|
||||
ScalarSize::Size128 => "q",
|
||||
};
|
||||
s.replace_range(0..1, replacement);
|
||||
}
|
||||
|
@ -321,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
|||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
match ty {
|
||||
F32X2 => s.push_str(".2s"),
|
||||
F32X4 => s.push_str(".4s"),
|
||||
F64X2 => s.push_str(".2d"),
|
||||
I8X8 => s.push_str(".8b"),
|
||||
I8X16 => s.push_str(".16b"),
|
||||
I16X4 => s.push_str(".4h"),
|
||||
I16X8 => s.push_str(".8h"),
|
||||
I32X2 => s.push_str(".2s"),
|
||||
I32X4 => s.push_str(".4s"),
|
||||
I64X2 => s.push_str(".2d"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => ".8b",
|
||||
VectorSize::Size8x16 => ".16b",
|
||||
VectorSize::Size16x4 => ".4h",
|
||||
VectorSize::Size16x8 => ".8h",
|
||||
VectorSize::Size32x2 => ".2s",
|
||||
VectorSize::Size32x4 => ".4s",
|
||||
VectorSize::Size64x2 => ".2d",
|
||||
};
|
||||
|
||||
s.push_str(suffix);
|
||||
s
|
||||
}
|
||||
|
||||
/// Show an indexed vector element.
|
||||
pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String {
|
||||
pub fn show_vreg_element(
|
||||
reg: Reg,
|
||||
mb_rru: Option<&RealRegUniverse>,
|
||||
idx: u8,
|
||||
size: VectorSize,
|
||||
) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
let suffix = match ty {
|
||||
I8 => "b",
|
||||
I16 => "h",
|
||||
I32 => "s",
|
||||
I64 => "d",
|
||||
F32 => "s",
|
||||
F64 => "d",
|
||||
_ => unimplemented!(),
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => "b",
|
||||
VectorSize::Size8x16 => "b",
|
||||
VectorSize::Size16x4 => "h",
|
||||
VectorSize::Size16x8 => "h",
|
||||
VectorSize::Size32x2 => "s",
|
||||
VectorSize::Size32x4 => "s",
|
||||
VectorSize::Size64x2 => "d",
|
||||
};
|
||||
|
||||
s.push_str(&format!(".{}[{}]", suffix, idx));
|
||||
|
|
|
@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
|
|||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
use crate::CodegenResult;
|
||||
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
|
@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
|||
ty: Type,
|
||||
cond: Cond,
|
||||
) -> CodegenResult<()> {
|
||||
match ty {
|
||||
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"unsupported SIMD type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let is_float = match ty {
|
||||
F32X4 | F64X2 => true,
|
||||
_ => false,
|
||||
};
|
||||
let size = VectorSize::from_ty(ty);
|
||||
// 'Less than' operations are implemented by swapping
|
||||
// the order of operands and using the 'greater than'
|
||||
// instructions.
|
||||
|
@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
|||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size,
|
||||
});
|
||||
|
||||
if cond == Cond::Ne {
|
||||
|
@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
|||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
ty: I8X16,
|
||||
size,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -829,8 +820,8 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
B1 => 1,
|
||||
B8 | I8 => 8,
|
||||
B16 | I16 => 16,
|
||||
B32 | I32 | F32 => 32,
|
||||
B64 | I64 | F64 => 64,
|
||||
B32 | I32 | F32 | R32 => 32,
|
||||
B64 | I64 | F64 | R64 => 64,
|
||||
B128 | I128 => 128,
|
||||
IFLAGS | FFLAGS => 32,
|
||||
B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64,
|
||||
|
@ -842,7 +833,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
|
||||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||
match ty {
|
||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true,
|
||||
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
|
||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||
|
@ -988,16 +979,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
(false, true) => NarrowValueMode::SignExtend64,
|
||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||
};
|
||||
let inputs = [
|
||||
InsnInput {
|
||||
insn: insn,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: insn,
|
||||
input: 1,
|
||||
},
|
||||
];
|
||||
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
|
@ -1010,16 +992,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let bits = ty_bits(ty);
|
||||
let inputs = [
|
||||
InsnInput {
|
||||
insn: insn,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: insn,
|
||||
input: 1,
|
||||
},
|
||||
];
|
||||
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
match bits {
|
||||
|
|
|
@ -58,96 +58,117 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Iadd => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
if ty_bits(ty) < 128 {
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Add,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::Isub => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
if ty_bits(ty) < 128 {
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Sub,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::UaddSat | Opcode::SaddSat => {
|
||||
// We use the vector instruction set's saturating adds (UQADD /
|
||||
// SQADD), which require vector registers.
|
||||
let is_signed = op == Opcode::SaddSat;
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
let alu_op = if is_signed {
|
||||
VecALUOp::SQAddScalar
|
||||
} else {
|
||||
VecALUOp::UQAddScalar
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||
// We use the scalar SIMD & FP saturating additions and subtractions
|
||||
// (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
|
||||
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||
ctx.emit(Inst::VecRRR {
|
||||
rd: va,
|
||||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn: va.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
});
|
||||
}
|
||||
if ty_bits(ty) < 128 {
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
let fpu_op = match op {
|
||||
Opcode::UaddSat => FPUOp2::Uqadd64,
|
||||
Opcode::SaddSat => FPUOp2::Sqadd64,
|
||||
Opcode::UsubSat => FPUOp2::Uqsub64,
|
||||
Opcode::SsubSat => FPUOp2::Sqsub64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op,
|
||||
rd: va,
|
||||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn: va.to_reg(),
|
||||
idx: 0,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
|
||||
Opcode::UsubSat | Opcode::SsubSat => {
|
||||
let is_signed = op == Opcode::SsubSat;
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
let alu_op = if is_signed {
|
||||
VecALUOp::SQSubScalar
|
||||
} else {
|
||||
VecALUOp::UQSubScalar
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||
ctx.emit(Inst::VecRRR {
|
||||
rd: va,
|
||||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn: va.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
});
|
||||
let alu_op = match op {
|
||||
Opcode::UaddSat => VecALUOp::Uqadd,
|
||||
Opcode::SaddSat => VecALUOp::Sqadd,
|
||||
Opcode::UsubSat => VecALUOp::Uqsub,
|
||||
Opcode::SsubSat => VecALUOp::Sqsub,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Ineg => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = zero_reg();
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
if ty_bits(ty) < 128 {
|
||||
let rn = zero_reg();
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Neg,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Imul => {
|
||||
|
@ -155,14 +176,24 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ra: zero_reg(),
|
||||
});
|
||||
if ty_bits(ty) < 128 {
|
||||
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ra: zero_reg(),
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Mul,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Umulhi | Opcode::Smulhi => {
|
||||
|
@ -282,14 +313,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// msub rd, rd, rm, rn ; rd = rn - rd * rm
|
||||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
});
|
||||
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp::MSub64,
|
||||
|
@ -300,17 +328,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
});
|
||||
} else {
|
||||
if div_op == ALUOp::SDiv64 {
|
||||
// cbz rm, #20
|
||||
// cbnz rm, #8
|
||||
// udf ; divide by zero
|
||||
// cmn rm, 1
|
||||
// ccmp rn, 1, #nzcv, eq
|
||||
// b.vc 12
|
||||
// b.vc #8
|
||||
// udf ; signed overflow
|
||||
// udf ; divide by zero
|
||||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 20;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
||||
|
@ -319,7 +347,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// The following checks must be done in 32-bit or 64-bit, depending
|
||||
// on the input type. Even though the initial div instruction is
|
||||
// always done in 64-bit currently.
|
||||
let size = InstSize::from_ty(ty);
|
||||
let size = OperandSize::from_ty(ty);
|
||||
// Check RHS is -1.
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
|
||||
|
@ -336,27 +364,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
nzcv: NZCV::new(false, false, false, false),
|
||||
cond: Cond::Eq,
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(12),
|
||||
kind: CondBrKind::Cond(Cond::Vc),
|
||||
});
|
||||
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(Cond::Vs),
|
||||
});
|
||||
} else {
|
||||
// cbnz rm, #8
|
||||
// udf ; divide by zero
|
||||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
}
|
||||
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -398,7 +421,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -442,32 +465,64 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
let ty = ty.unwrap();
|
||||
let size = InstSize::from_bits(ty_bits(ty));
|
||||
let narrow_mode = match (op, size) {
|
||||
(Opcode::Ishl, _) => NarrowValueMode::None,
|
||||
(Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
|
||||
(Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
|
||||
(Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
|
||||
(Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
|
||||
let alu_op = match op {
|
||||
Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
|
||||
Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
|
||||
Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
|
||||
if ty_bits(ty) < 128 {
|
||||
let size = OperandSize::from_bits(ty_bits(ty));
|
||||
let narrow_mode = match (op, size) {
|
||||
(Opcode::Ishl, _) => NarrowValueMode::None,
|
||||
(Opcode::Ushr, OperandSize::Size64) => NarrowValueMode::ZeroExtend64,
|
||||
(Opcode::Ushr, OperandSize::Size32) => NarrowValueMode::ZeroExtend32,
|
||||
(Opcode::Sshr, OperandSize::Size64) => NarrowValueMode::SignExtend64,
|
||||
(Opcode::Sshr, OperandSize::Size32) => NarrowValueMode::SignExtend32,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
|
||||
let alu_op = match op {
|
||||
Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
|
||||
Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
|
||||
Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let size = VectorSize::from_ty(ty);
|
||||
let (alu_op, is_right_shift) = match op {
|
||||
Opcode::Ishl => (VecALUOp::Sshl, false),
|
||||
Opcode::Ushr => (VecALUOp::Ushl, true),
|
||||
Opcode::Sshr => (VecALUOp::Sshl, true),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rm = if is_right_shift {
|
||||
// Right shifts are implemented with a negative left shift.
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rn = zero_reg();
|
||||
ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
|
||||
tmp.to_reg()
|
||||
} else {
|
||||
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecDup { rd, rn: rm, size });
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm: rd.to_reg(),
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Rotr | Opcode::Rotl => {
|
||||
|
@ -1107,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1134,7 +1189,26 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
Opcode::IsNull | Opcode::IsInvalid => {
|
||||
panic!("Reference types not supported");
|
||||
// Null references are represented by the constant value 0; invalid references are
|
||||
// represented by the constant value -1. See `define_reftypes()` in
|
||||
// `meta/src/isa/x86/encodings.rs` to confirm.
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let (alu_op, const_value) = match op {
|
||||
Opcode::IsNull => {
|
||||
// cmp rn, #0
|
||||
(choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64), 0)
|
||||
}
|
||||
Opcode::IsInvalid => {
|
||||
// cmn rn, #1
|
||||
(choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64), 1)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap());
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value));
|
||||
ctx.emit(Inst::CSet { rd, cond: Cond::Eq });
|
||||
}
|
||||
|
||||
Opcode::Copy => {
|
||||
|
@ -1145,6 +1219,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
Opcode::Bint | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => {
|
||||
// If this is a Bint from a Trueif/Trueff/IsNull/IsInvalid, then the result is already
|
||||
// 64-bit-zero-extended, even if the CLIF type doesn't say so, because it was produced
|
||||
// by a CSet. In this case, we do not need to do any zero-extension.
|
||||
let input_info = ctx.get_input(insn, 0);
|
||||
let src_op = input_info
|
||||
.inst
|
||||
.map(|(src_inst, _)| ctx.data(src_inst).opcode());
|
||||
let narrow_mode = match (src_op, op) {
|
||||
(Some(Opcode::Trueif), Opcode::Bint)
|
||||
| (Some(Opcode::Trueff), Opcode::Bint)
|
||||
| (Some(Opcode::IsNull), Opcode::Bint)
|
||||
| (Some(Opcode::IsInvalid), Opcode::Bint) => NarrowValueMode::None,
|
||||
_ => NarrowValueMode::ZeroExtend64,
|
||||
};
|
||||
|
||||
// All of these ops are simply a move from a zero-extended source.
|
||||
// Here is why this works, in each case:
|
||||
//
|
||||
|
@ -1157,7 +1246,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// - Ireduce: changing width of an integer. Smaller ints are stored
|
||||
// with undefined high-order bits, so we can simply do a copy.
|
||||
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
|
@ -1203,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rd,
|
||||
rn,
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1290,7 +1379,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
Opcode::Trap | Opcode::ResumableTrap => {
|
||||
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
|
||||
ctx.emit(Inst::Udf { trap_info })
|
||||
ctx.emit_safepoint(Inst::Udf { trap_info });
|
||||
}
|
||||
|
||||
Opcode::Trapif | Opcode::Trapff => {
|
||||
|
@ -1324,19 +1413,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
cond
|
||||
};
|
||||
|
||||
// Branch around the break instruction with inverted cond. Go straight to lowered
|
||||
// one-target form; this is logically part of a single-in single-out template lowering.
|
||||
let cond = cond.invert();
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
|
||||
ctx.emit(Inst::Udf { trap_info })
|
||||
ctx.emit_safepoint(Inst::Udf { trap_info })
|
||||
}
|
||||
|
||||
Opcode::Safepoint => {
|
||||
panic!("safepoint support not implemented!");
|
||||
panic!("safepoint instructions not used by new backend's safepoints!");
|
||||
}
|
||||
|
||||
Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => {
|
||||
|
@ -1467,15 +1552,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
let idx = *imm;
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
let ty = ty.unwrap();
|
||||
|
||||
if ty_is_int(ty) {
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, ty });
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
|
||||
// Plain moves are faster on some processors.
|
||||
} else if idx == 0 {
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
} else {
|
||||
ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, ty });
|
||||
ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
|
||||
}
|
||||
} else {
|
||||
unreachable!();
|
||||
|
@ -1485,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Splat => {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let inst = if ty_is_int(ty) {
|
||||
Inst::VecDup { rd, rn, ty }
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ty.unwrap());
|
||||
let inst = if ty_is_int(input_ty) {
|
||||
Inst::VecDup { rd, rn, size }
|
||||
} else {
|
||||
Inst::VecDupFromFpu { rd, rn, ty }
|
||||
Inst::VecDupFromFpu { rd, rn, size }
|
||||
};
|
||||
ctx.emit(inst);
|
||||
}
|
||||
|
@ -1507,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// cmp xm, #0
|
||||
// cset xm, ne
|
||||
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
|
||||
if op == Opcode::VanyTrue {
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Umaxp,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
rm: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
};
|
||||
|
||||
|
@ -1529,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
|
@ -1711,12 +1799,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
} else {
|
||||
ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
|
||||
}
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
|
||||
});
|
||||
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
|
||||
|
@ -1752,12 +1839,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
||||
});
|
||||
|
||||
// <= high_bound
|
||||
lower_constant_f32(ctx, tmp, high_bound);
|
||||
|
@ -1765,12 +1851,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
||||
});
|
||||
} else {
|
||||
// From float64.
|
||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
||||
|
@ -1795,12 +1880,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
||||
});
|
||||
|
||||
// <= high_bound
|
||||
lower_constant_f64(ctx, tmp, high_bound);
|
||||
|
@ -1808,12 +1892,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_info,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
||||
});
|
||||
};
|
||||
|
||||
// Do the conversion.
|
||||
|
@ -2050,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
| Opcode::X86Insertps
|
||||
| Opcode::X86Movsd
|
||||
| Opcode::X86Movlhps
|
||||
| Opcode::X86Palignr
|
||||
| Opcode::X86Psll
|
||||
| Opcode::X86Psrl
|
||||
| Opcode::X86Psra
|
||||
|
@ -2060,7 +2144,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Packss
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
| Opcode::X86Vcvtudq2ps
|
||||
|
@ -2069,8 +2152,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||
}
|
||||
|
||||
Opcode::Iabs => unimplemented!(),
|
||||
Opcode::AvgRound => unimplemented!(),
|
||||
Opcode::Iabs => unimplemented!(),
|
||||
Opcode::Snarrow
|
||||
| Opcode::Unarrow
|
||||
| Opcode::SwidenLow
|
||||
| Opcode::SwidenHigh
|
||||
| Opcode::UwidenLow
|
||||
| Opcode::UwidenHigh => unimplemented!(),
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
|
@ -2307,7 +2396,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
|
||||
// Bounds-check and branch to default.
|
||||
// Bounds-check, leaving condition codes for JTSequence's
|
||||
// branch to default target below.
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::SubS32,
|
||||
|
@ -2324,14 +2414,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
}
|
||||
let default_target = BranchTarget::Label(targets[0]);
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: default_target.clone(),
|
||||
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
|
||||
});
|
||||
|
||||
// Emit the compound instruction that does:
|
||||
//
|
||||
// b.hs default
|
||||
// adr rA, jt
|
||||
// ldrsw rB, [rA, rIndex, UXTW 2]
|
||||
// add rA, rA, rB
|
||||
|
@ -2350,6 +2436,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
.skip(1)
|
||||
.map(|bix| BranchTarget::Label(*bix))
|
||||
.collect();
|
||||
let default_target = BranchTarget::Label(targets[0]);
|
||||
let targets_for_term: Vec<MachLabel> = targets.to_vec();
|
||||
ctx.emit(Inst::JTSequence {
|
||||
ridx,
|
||||
|
@ -2357,7 +2444,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
rtmp2,
|
||||
info: Box::new(JTSequenceInfo {
|
||||
targets: jt_targets,
|
||||
targets_for_term: targets_for_term,
|
||||
default_target,
|
||||
targets_for_term,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
|
|
@ -121,7 +121,11 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
|
|||
match triple.architecture {
|
||||
Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple),
|
||||
Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
|
||||
isa_builder!(x86, "x86", triple)
|
||||
if cfg!(feature = "x64") {
|
||||
isa_builder!(x64, "x64", triple)
|
||||
} else {
|
||||
isa_builder!(x86, "x86", triple)
|
||||
}
|
||||
}
|
||||
Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
|
||||
Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//! System V ABI unwind information.
|
||||
//! Windows x64 ABI unwind information.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
|
@ -57,10 +57,6 @@ pub(crate) enum UnwindCode {
|
|||
offset: u8,
|
||||
size: u32,
|
||||
},
|
||||
SetFramePointer {
|
||||
offset: u8,
|
||||
sp_offset: u8,
|
||||
},
|
||||
}
|
||||
|
||||
impl UnwindCode {
|
||||
|
@ -69,7 +65,6 @@ impl UnwindCode {
|
|||
PushNonvolatileRegister = 0,
|
||||
LargeStackAlloc = 1,
|
||||
SmallStackAlloc = 2,
|
||||
SetFramePointer = 3,
|
||||
SaveXmm128 = 8,
|
||||
SaveXmm128Far = 9,
|
||||
}
|
||||
|
@ -85,13 +80,13 @@ impl UnwindCode {
|
|||
stack_offset,
|
||||
} => {
|
||||
writer.write_u8(*offset);
|
||||
let stack_offset = stack_offset / 16;
|
||||
if stack_offset <= core::u16::MAX as u32 {
|
||||
let scaled_stack_offset = stack_offset / 16;
|
||||
if scaled_stack_offset <= core::u16::MAX as u32 {
|
||||
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
|
||||
writer.write_u16::<LittleEndian>(stack_offset as u16);
|
||||
writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
|
||||
} else {
|
||||
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
|
||||
writer.write_u16::<LittleEndian>(stack_offset as u16);
|
||||
writer.write_u16::<LittleEndian>(*stack_offset as u16);
|
||||
writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
|
||||
}
|
||||
}
|
||||
|
@ -113,10 +108,6 @@ impl UnwindCode {
|
|||
writer.write_u32::<LittleEndian>(*size);
|
||||
}
|
||||
}
|
||||
Self::SetFramePointer { offset, sp_offset } => {
|
||||
writer.write_u8(*offset);
|
||||
writer.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,9 @@ use log::trace;
|
|||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
use std::mem;
|
||||
|
||||
use crate::binemit::Stackmap;
|
||||
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
|
||||
use crate::isa::{self, x64::inst::*};
|
||||
use crate::isa::{x64::inst::*, CallConv};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
@ -39,7 +40,7 @@ struct ABISig {
|
|||
/// Index in `args` of the stack-return-value-area argument.
|
||||
stack_ret_arg: Option<usize>,
|
||||
/// Calling convention used.
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
}
|
||||
|
||||
pub(crate) struct X64ABIBody {
|
||||
|
@ -64,7 +65,7 @@ pub(crate) struct X64ABIBody {
|
|||
/// which RSP is adjusted downwards to allocate the spill area.
|
||||
frame_size_bytes: Option<usize>,
|
||||
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
|
||||
/// The settings controlling this function's compilation.
|
||||
flags: settings::Flags,
|
||||
|
@ -92,7 +93,11 @@ fn in_vec_reg(ty: types::Type) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
|
||||
_ => panic!("int args only supported for SysV calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
|
@ -104,7 +109,11 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {}
|
||||
_ => panic!("float args only supported for SysV calling convention"),
|
||||
};
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
|
@ -118,19 +127,39 @@ fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
fn get_intreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV => {
|
||||
if idx == 0 {
|
||||
Some(regs::rax())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
_ => None,
|
||||
fn get_fltreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||
match call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx {
|
||||
0 => Some(regs::xmm0()),
|
||||
1 => Some(regs::xmm1()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV => {
|
||||
if idx == 0 {
|
||||
Some(regs::xmm0())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -146,10 +175,39 @@ fn is_callee_save_systemv(r: RealReg) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
regs.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect()
|
||||
fn is_callee_save_baldrdash(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
if r.get_hw_encoding() as u8 == ENC_R14 {
|
||||
// r14 is the WasmTlsReg and is preserved implicitly.
|
||||
false
|
||||
} else {
|
||||
// Defer to native for the other ones.
|
||||
is_callee_save_systemv(r)
|
||||
}
|
||||
}
|
||||
RegClass::V128 => false,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(call_conv: &CallConv, regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
match call_conv {
|
||||
CallConv::BaldrdashSystemV => regs
|
||||
.into_iter()
|
||||
.filter(|r| is_callee_save_baldrdash(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::BaldrdashWindows => {
|
||||
todo!("baldrdash windows");
|
||||
}
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
|
||||
.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::WindowsFastcall => todo!("windows fastcall"),
|
||||
CallConv::Probestack => todo!("probestack?"),
|
||||
}
|
||||
}
|
||||
|
||||
impl X64ABIBody {
|
||||
|
@ -159,7 +217,7 @@ impl X64ABIBody {
|
|||
|
||||
let call_conv = f.signature.call_conv;
|
||||
debug_assert!(
|
||||
call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(),
|
||||
call_conv == CallConv::SystemV || call_conv.extends_baldrdash(),
|
||||
"unsupported or unimplemented calling convention {}",
|
||||
call_conv
|
||||
);
|
||||
|
@ -194,7 +252,6 @@ impl X64ABIBody {
|
|||
if self.call_conv.extends_baldrdash() {
|
||||
let num_words = self.flags.baldrdash_prologue_words() as i64;
|
||||
debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
|
||||
debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
|
||||
num_words * 8
|
||||
} else {
|
||||
16 // frame pointer + return address.
|
||||
|
@ -268,7 +325,18 @@ impl ABIBody for X64ABIBody {
|
|||
}
|
||||
|
||||
fn gen_retval_area_setup(&self) -> Option<Inst> {
|
||||
None
|
||||
if let Some(i) = self.sig.stack_ret_arg {
|
||||
let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap());
|
||||
trace!(
|
||||
"gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
|
||||
inst,
|
||||
self.ret_area_ptr.unwrap().to_reg()
|
||||
);
|
||||
Some(inst)
|
||||
} else {
|
||||
trace!("gen_retval_area_setup: not needed");
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_retval(
|
||||
|
@ -294,15 +362,17 @@ impl ABIBody for X64ABIBody {
|
|||
(ArgumentExtension::Uext, Some(ext_mode)) => {
|
||||
ret.push(Inst::movzx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::reg(r.to_reg()),
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
dest_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
(ArgumentExtension::Sext, Some(ext_mode)) => {
|
||||
ret.push(Inst::movsx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::reg(r.to_reg()),
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
dest_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
_ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)),
|
||||
|
@ -326,6 +396,7 @@ impl ABIBody for X64ABIBody {
|
|||
ext_mode,
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
from_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
(ArgumentExtension::Sext, Some(ext_mode)) => {
|
||||
|
@ -333,6 +404,7 @@ impl ABIBody for X64ABIBody {
|
|||
ext_mode,
|
||||
RegMem::reg(from_reg.to_reg()),
|
||||
from_reg,
|
||||
/* infallible load */ None,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
|
@ -387,12 +459,36 @@ impl ABIBody for X64ABIBody {
|
|||
unimplemented!("store_stackslot")
|
||||
}
|
||||
|
||||
fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
|
||||
unimplemented!("load_spillslot")
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Inst {
|
||||
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let spill_off = islot * 8;
|
||||
let sp_off = self.stack_slots_size as i64 + spill_off;
|
||||
debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
|
||||
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
load_stack(
|
||||
SyntheticAmode::nominal_sp_offset(sp_off as u32),
|
||||
into_reg,
|
||||
ty,
|
||||
)
|
||||
}
|
||||
|
||||
fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
|
||||
unimplemented!("store_spillslot")
|
||||
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst {
|
||||
// Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let spill_off = islot * 8;
|
||||
let sp_off = self.stack_slots_size as i64 + spill_off;
|
||||
debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI");
|
||||
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
store_stack(
|
||||
SyntheticAmode::nominal_sp_offset(sp_off as u32),
|
||||
from_reg,
|
||||
ty,
|
||||
)
|
||||
}
|
||||
|
||||
fn spillslots_to_stackmap(&self, _slots: &[SpillSlot], _state: &EmitState) -> Stackmap {
|
||||
unimplemented!("spillslots_to_stackmap")
|
||||
}
|
||||
|
||||
fn gen_prologue(&mut self) -> Vec<Inst> {
|
||||
|
@ -412,7 +508,7 @@ impl ABIBody for X64ABIBody {
|
|||
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
|
||||
}
|
||||
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
let callee_saved_used: usize = clobbered
|
||||
.iter()
|
||||
.map(|reg| match reg.to_reg().get_class() {
|
||||
|
@ -456,7 +552,7 @@ impl ABIBody for X64ABIBody {
|
|||
|
||||
// Save callee saved registers that we trash. Keep track of how much space we've used, so
|
||||
// as to know what we have to do to get the base of the spill area 0 % 16.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
for reg in clobbered {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
|
@ -486,7 +582,7 @@ impl ABIBody for X64ABIBody {
|
|||
// Undo what we did in the prologue.
|
||||
|
||||
// Restore regs.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec());
|
||||
for wreg in clobbered.into_iter().rev() {
|
||||
let rreg = wreg.to_reg();
|
||||
match rreg.get_class() {
|
||||
|
@ -533,6 +629,10 @@ impl ABIBody for X64ABIBody {
|
|||
.expect("frame size not computed before prologue generation") as u32
|
||||
}
|
||||
|
||||
fn stack_args_size(&self) -> u32 {
|
||||
unimplemented!("I need to be computed!")
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
|
@ -543,16 +643,43 @@ impl ABIBody for X64ABIBody {
|
|||
}
|
||||
}
|
||||
|
||||
fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Inst {
|
||||
let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty);
|
||||
self.store_spillslot(to_slot, ty, from_reg.to_reg())
|
||||
}
|
||||
|
||||
fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
fn gen_reload(
|
||||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
ty: Option<Type>,
|
||||
) -> Inst {
|
||||
let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty);
|
||||
self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
|
||||
}
|
||||
}
|
||||
|
||||
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
/// Return a type either from an optional type hint, or if not, from the default
|
||||
/// type associated with the given register's class. This is used to generate
|
||||
/// loads/spills appropriately given the type of value loaded/stored (which may
|
||||
/// be narrower than the spillslot). We usually have the type because the
|
||||
/// regalloc usually provides the vreg being spilled/reloaded, and we know every
|
||||
/// vreg's type. However, the regalloc *can* request a spill/reload without an
|
||||
/// associated vreg when needed to satisfy a safepoint (which requires all
|
||||
/// ref-typed values, even those in real registers in the original vcode, to be
|
||||
/// in spillslots).
|
||||
fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option<Type>) -> Type {
|
||||
match (ty, r.get_class()) {
|
||||
// If the type is provided
|
||||
(Some(t), _) => t,
|
||||
// If no type is provided, this should be a register spill for a
|
||||
// safepoint, so we only expect I64 (integer) registers.
|
||||
(None, RegClass::I64) => I64,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_caller_saves(call_conv: CallConv) -> Vec<Writable<Reg>> {
|
||||
let mut caller_saved = Vec::new();
|
||||
|
||||
// Systemv calling convention:
|
||||
|
@ -567,6 +694,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
|||
caller_saved.push(Writable::from_reg(regs::r10()));
|
||||
caller_saved.push(Writable::from_reg(regs::r11()));
|
||||
|
||||
if call_conv.extends_baldrdash() {
|
||||
caller_saved.push(Writable::from_reg(regs::r12()));
|
||||
caller_saved.push(Writable::from_reg(regs::r13()));
|
||||
// Not r14; implicitly preserved in the entry.
|
||||
caller_saved.push(Writable::from_reg(regs::r15()));
|
||||
caller_saved.push(Writable::from_reg(regs::rbx()));
|
||||
}
|
||||
|
||||
// - XMM: all the registers!
|
||||
caller_saved.push(Writable::from_reg(regs::xmm0()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm1()));
|
||||
|
@ -585,10 +720,6 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
|
|||
caller_saved.push(Writable::from_reg(regs::xmm14()));
|
||||
caller_saved.push(Writable::from_reg(regs::xmm15()));
|
||||
|
||||
if call_conv.extends_baldrdash() {
|
||||
todo!("add the baldrdash caller saved")
|
||||
}
|
||||
|
||||
caller_saved
|
||||
}
|
||||
|
||||
|
@ -615,7 +746,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
|
|||
}
|
||||
|
||||
/// Try to fill a Baldrdash register, returning it if it was found.
|
||||
fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
|
||||
fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
|
||||
if call_conv.extends_baldrdash() {
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext => {
|
||||
|
@ -649,16 +780,13 @@ enum ArgsOrRets {
|
|||
/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the
|
||||
/// index of the extra synthetic arg that was added.
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
call_conv: CallConv,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||
let is_baldrdash = call_conv.extends_baldrdash();
|
||||
|
||||
// XXX assume SystemV at the moment.
|
||||
debug_assert!(!is_baldrdash, "baldrdash nyi");
|
||||
|
||||
let mut next_gpr = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
|
@ -692,8 +820,8 @@ fn compute_arg_locs(
|
|||
|
||||
let (next_reg, candidate) = if intreg {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr),
|
||||
ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr),
|
||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
|
||||
ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr),
|
||||
};
|
||||
debug_assert!(candidate
|
||||
.map(|r| r.get_class() == RegClass::I64)
|
||||
|
@ -701,8 +829,8 @@ fn compute_arg_locs(
|
|||
(&mut next_gpr, candidate)
|
||||
} else {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_vreg),
|
||||
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_vreg),
|
||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
|
||||
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg),
|
||||
};
|
||||
debug_assert!(candidate
|
||||
.map(|r| r.get_class() == RegClass::V128)
|
||||
|
@ -735,7 +863,7 @@ fn compute_arg_locs(
|
|||
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) {
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
|
||||
ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64));
|
||||
} else {
|
||||
ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64));
|
||||
|
@ -828,7 +956,7 @@ fn adjust_stack<C: LowerCtx<I = Inst>>(ctx: &mut C, amount: u64, is_sub: bool) {
|
|||
}
|
||||
}
|
||||
|
||||
fn load_stack(mem: Amode, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
||||
fn load_stack(mem: impl Into<SyntheticAmode>, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
||||
let ext_mode = match ty {
|
||||
types::B1 | types::B8 | types::I8 => Some(ExtMode::BQ),
|
||||
types::B16 | types::I16 => Some(ExtMode::WQ),
|
||||
|
@ -839,13 +967,19 @@ fn load_stack(mem: Amode, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
|||
_ => unimplemented!("load_stack({})", ty),
|
||||
};
|
||||
|
||||
let mem = mem.into();
|
||||
match ext_mode {
|
||||
Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg),
|
||||
None => Inst::mov64_m_r(mem, into_reg),
|
||||
Some(ext_mode) => Inst::movsx_rm_r(
|
||||
ext_mode,
|
||||
RegMem::mem(mem),
|
||||
into_reg,
|
||||
/* infallible load */ None,
|
||||
),
|
||||
None => Inst::mov64_m_r(mem, into_reg, None /* infallible */),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_stack(mem: Amode, from_reg: Reg, ty: Type) -> Inst {
|
||||
fn store_stack(mem: impl Into<SyntheticAmode>, from_reg: Reg, ty: Type) -> Inst {
|
||||
let (is_int, size) = match ty {
|
||||
types::B1 | types::B8 | types::I8 => (true, 1),
|
||||
types::B16 | types::I16 => (true, 2),
|
||||
|
@ -855,8 +989,9 @@ fn store_stack(mem: Amode, from_reg: Reg, ty: Type) -> Inst {
|
|||
types::F64 => (false, 8),
|
||||
_ => unimplemented!("store_stack({})", ty),
|
||||
};
|
||||
let mem = mem.into();
|
||||
if is_int {
|
||||
Inst::mov_r_m(size, from_reg, mem)
|
||||
Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None)
|
||||
} else {
|
||||
unimplemented!("f32/f64 store_stack");
|
||||
}
|
||||
|
|
|
@ -27,6 +27,10 @@ pub enum Amode {
|
|||
index: Reg,
|
||||
shift: u8, /* 0 .. 3 only */
|
||||
},
|
||||
|
||||
/// sign-extend-32-to-64(Immediate) + RIP (instruction pointer).
|
||||
/// To wit: not supported in 32-bits mode.
|
||||
RipRelative { target: BranchTarget },
|
||||
}
|
||||
|
||||
impl Amode {
|
||||
|
@ -47,6 +51,10 @@ impl Amode {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn rip_relative(target: BranchTarget) -> Self {
|
||||
Self::RipRelative { target }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
|
@ -57,6 +65,9 @@ impl Amode {
|
|||
collector.add_use(*base);
|
||||
collector.add_use(*index);
|
||||
}
|
||||
Amode::RipRelative { .. } => {
|
||||
// RIP isn't involved in regalloc.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -79,6 +90,13 @@ impl ShowWithRRU for Amode {
|
|||
index.show_rru(mb_rru),
|
||||
1 << shift
|
||||
),
|
||||
Amode::RipRelative { ref target } => format!(
|
||||
"{}(%rip)",
|
||||
match target {
|
||||
BranchTarget::Label(label) => format!("label{}", label.get()),
|
||||
BranchTarget::ResolvedOffset(offset) => offset.to_string(),
|
||||
}
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -181,7 +199,7 @@ impl RegMemImm {
|
|||
match self {
|
||||
Self::Reg { reg } => collector.add_use(*reg),
|
||||
Self::Mem { addr } => addr.get_regs_as_uses(collector),
|
||||
Self::Imm { simm32: _ } => {}
|
||||
Self::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -216,12 +234,11 @@ impl RegMem {
|
|||
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
|
||||
Self::Mem { addr: addr.into() }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RegMem::Reg { reg } => collector.add_use(*reg),
|
||||
RegMem::Mem { addr } => addr.get_regs_as_uses(collector),
|
||||
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +251,7 @@ impl ShowWithRRU for RegMem {
|
|||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RegMem::Mem { addr } => addr.show_rru(mb_rru),
|
||||
RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -265,9 +282,32 @@ impl fmt::Debug for AluRmiROpcode {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for AluRmiROpcode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for AluRmiROpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum UnaryRmROpcode {
|
||||
/// Bit-scan reverse.
|
||||
Bsr,
|
||||
/// Bit-scan forward.
|
||||
Bsf,
|
||||
}
|
||||
|
||||
impl fmt::Debug for UnaryRmROpcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
|
||||
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryRmROpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -428,9 +468,9 @@ impl fmt::Debug for SseOpcode {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for SseOpcode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for SseOpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -479,34 +519,65 @@ impl fmt::Debug for ExtMode {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for ExtMode {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for ExtMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
|
||||
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
|
||||
#[derive(Clone)]
|
||||
pub enum ShiftKind {
|
||||
Left,
|
||||
RightZ,
|
||||
RightS,
|
||||
ShiftLeft,
|
||||
/// Inserts zeros in the most significant bits.
|
||||
ShiftRightLogical,
|
||||
/// Replicates the sign bit in the most significant bits.
|
||||
ShiftRightArithmetic,
|
||||
RotateLeft,
|
||||
RotateRight,
|
||||
}
|
||||
|
||||
impl fmt::Debug for ShiftKind {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
ShiftKind::Left => "shl",
|
||||
ShiftKind::RightZ => "shr",
|
||||
ShiftKind::RightS => "sar",
|
||||
ShiftKind::ShiftLeft => "shl",
|
||||
ShiftKind::ShiftRightLogical => "shr",
|
||||
ShiftKind::ShiftRightArithmetic => "sar",
|
||||
ShiftKind::RotateLeft => "rol",
|
||||
ShiftKind::RotateRight => "ror",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for ShiftKind {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for ShiftKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// What kind of division or remainer instruction this is?
|
||||
#[derive(Clone)]
|
||||
pub enum DivOrRemKind {
|
||||
SignedDiv,
|
||||
UnsignedDiv,
|
||||
SignedRem,
|
||||
UnsignedRem,
|
||||
}
|
||||
|
||||
impl DivOrRemKind {
|
||||
pub(crate) fn is_signed(&self) -> bool {
|
||||
match self {
|
||||
DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_div(&self) -> bool {
|
||||
match self {
|
||||
DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -532,7 +603,7 @@ pub enum CC {
|
|||
|
||||
/// <= unsigned
|
||||
BE = 6,
|
||||
/// > unsigend
|
||||
/// > unsigned
|
||||
NBE = 7,
|
||||
|
||||
/// negative
|
||||
|
@ -621,9 +692,9 @@ impl fmt::Debug for CC {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for CC {
|
||||
fn to_string(&self) -> String {
|
||||
format!("{:?}", self)
|
||||
impl fmt::Display for CC {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use log::debug;
|
||||
use regalloc::Reg;
|
||||
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::binemit::Reloc;
|
||||
use crate::isa::x64::inst::*;
|
||||
|
||||
|
@ -262,6 +264,34 @@ fn emit_std_enc_mem(
|
|||
panic!("ImmRegRegShift");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::RipRelative { ref target } => {
|
||||
// First, the REX byte, with REX.B = 0.
|
||||
rex.emit_two_op(sink, enc_g, 0);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// RIP-relative is mod=00, rm=101.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
|
||||
|
||||
match *target {
|
||||
BranchTarget::Label(label) => {
|
||||
let offset = sink.cur_offset();
|
||||
sink.use_label_at_offset(offset, label, LabelUse::JmpRel32);
|
||||
sink.put4(0);
|
||||
}
|
||||
BranchTarget::ResolvedOffset(offset) => {
|
||||
let offset =
|
||||
u32::try_from(offset).expect("rip-relative can't hold >= U32_MAX values");
|
||||
sink.put4(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -340,6 +370,16 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Emits a one way conditional jump if CC is set (true).
|
||||
fn one_way_jmp(sink: &mut MachBuffer<Inst>, cc: CC, label: MachLabel) {
|
||||
let cond_start = sink.cur_offset();
|
||||
let cond_disp_off = cond_start + 2;
|
||||
sink.use_label_at_offset(cond_disp_off, label, LabelUse::JmpRel32);
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0x80 + cc.get_enc());
|
||||
sink.put4(0x0);
|
||||
}
|
||||
|
||||
/// The top-level emit function.
|
||||
///
|
||||
/// Important! Do not add improved (shortened) encoding cases to existing
|
||||
|
@ -395,7 +435,7 @@ fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
|||
pub(crate) fn emit(
|
||||
inst: &Inst,
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
_flags: &settings::Flags,
|
||||
flags: &settings::Flags,
|
||||
state: &mut EmitState,
|
||||
) {
|
||||
match inst {
|
||||
|
@ -516,6 +556,226 @@ pub(crate) fn emit(
|
|||
}
|
||||
}
|
||||
|
||||
Inst::UnaryRmR { size, op, src, dst } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let (opcode, num_opcodes) = match op {
|
||||
UnaryRmROpcode::Bsr => (0x0fbd, 2),
|
||||
UnaryRmROpcode::Bsf => (0x0fbc, 2),
|
||||
};
|
||||
|
||||
match src {
|
||||
RegMem::Reg { reg: src } => emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
divisor,
|
||||
loc,
|
||||
} => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
sink.add_trap(*loc, TrapCode::IntegerDivisionByZero);
|
||||
|
||||
let subopcode = if *signed { 7 } else { 6 };
|
||||
match divisor {
|
||||
RegMem::Reg { reg } => {
|
||||
let src = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
|
||||
}
|
||||
RegMem::Mem { addr: src } => emit_std_enc_mem(
|
||||
sink,
|
||||
prefix,
|
||||
0xF7,
|
||||
1,
|
||||
subopcode,
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::MulHi { size, signed, rhs } => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let subopcode = if *signed { 5 } else { 4 };
|
||||
match rhs {
|
||||
RegMem::Reg { reg } => {
|
||||
let src = int_reg_enc(*reg);
|
||||
emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
|
||||
}
|
||||
RegMem::Mem { addr: src } => emit_std_enc_mem(
|
||||
sink,
|
||||
prefix,
|
||||
0xF7,
|
||||
1,
|
||||
subopcode,
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
Inst::SignExtendRaxRdx { size } => {
|
||||
match size {
|
||||
2 => sink.put1(0x66),
|
||||
4 => {}
|
||||
8 => sink.put1(0x48),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
sink.put1(0x99);
|
||||
}
|
||||
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
kind,
|
||||
size,
|
||||
divisor,
|
||||
loc,
|
||||
tmp,
|
||||
} => {
|
||||
// Generates the following code sequence:
|
||||
//
|
||||
// ;; check divide by zero:
|
||||
// cmp 0 %divisor
|
||||
// jnz $after_trap
|
||||
// ud2
|
||||
// $after_trap:
|
||||
//
|
||||
// ;; for signed modulo/div:
|
||||
// cmp -1 %divisor
|
||||
// jnz $do_op
|
||||
// ;; for signed modulo, result is 0
|
||||
// mov #0, %rdx
|
||||
// j $done
|
||||
// ;; for signed div, check for integer overflow against INT_MIN of the right size
|
||||
// cmp INT_MIN, %rax
|
||||
// jnz $do_op
|
||||
// ud2
|
||||
//
|
||||
// $do_op:
|
||||
// ;; if signed
|
||||
// cdq ;; sign-extend from rax into rdx
|
||||
// ;; else
|
||||
// mov #0, %rdx
|
||||
// idiv %divisor
|
||||
//
|
||||
// $done:
|
||||
debug_assert!(flags.avoid_div_traps());
|
||||
|
||||
// Check if the divisor is zero, first.
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), *divisor);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let (do_op, done_label) = if kind.is_signed() {
|
||||
// Now check if the divisor is -1.
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let do_op = sink.get_label();
|
||||
|
||||
// If not equal, jump to do-op.
|
||||
one_way_jmp(sink, CC::NZ, do_op);
|
||||
|
||||
// Here, divisor == -1.
|
||||
if !kind.is_div() {
|
||||
// x % -1 = 0; put the result into the destination, $rdx.
|
||||
let done_label = sink.get_label();
|
||||
|
||||
let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx()));
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::jmp_known(BranchTarget::Label(done_label));
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
(Some(do_op), Some(done_label))
|
||||
} else {
|
||||
// Check for integer overflow.
|
||||
if *size == 8 {
|
||||
let tmp = tmp.expect("temporary for i64 sdiv");
|
||||
|
||||
let inst = Inst::imm_r(true, 0x8000000000000000, tmp);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
|
||||
inst.emit(sink, flags, state);
|
||||
} else {
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
// If not equal, jump over the trap.
|
||||
let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow, *loc);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
(Some(do_op), None)
|
||||
}
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
if let Some(do_op) = do_op {
|
||||
sink.bind_label(do_op);
|
||||
}
|
||||
|
||||
// Fill in the high parts:
|
||||
if kind.is_signed() {
|
||||
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
|
||||
let inst = Inst::sign_extend_rax_to_rdx(*size);
|
||||
inst.emit(sink, flags, state);
|
||||
} else {
|
||||
// zero for unsigned opcodes.
|
||||
let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx()));
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Lowering takes care of moving the result back into the right register, see comment
|
||||
// there.
|
||||
|
||||
if let Some(done) = done_label {
|
||||
sink.bind_label(done);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
|
@ -546,7 +806,12 @@ pub(crate) fn emit(
|
|||
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
|
||||
}
|
||||
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVZBL is (REX.W==0) 0F B6 /r
|
||||
|
@ -588,27 +853,45 @@ pub(crate) fn emit(
|
|||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
rex_flags,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov64_M_R { src, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
RexFlags::set_w(),
|
||||
),
|
||||
Inst::Mov64_M_R { src, dst, srcloc } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
RexFlags::set_w(),
|
||||
)
|
||||
}
|
||||
|
||||
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
|
||||
sink,
|
||||
|
@ -620,7 +903,12 @@ pub(crate) fn emit(
|
|||
RexFlags::set_w(),
|
||||
),
|
||||
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
|
@ -654,21 +942,41 @@ pub(crate) fn emit(
|
|||
*src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
&src.finalize(state),
|
||||
rex_flags,
|
||||
),
|
||||
|
||||
RegMem::Mem { addr: src } => {
|
||||
let src = &src.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
LegacyPrefix::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
src,
|
||||
rex_flags,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Mov_R_M { size, src, dst } => {
|
||||
Inst::Mov_R_M {
|
||||
size,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let dst = &dst.finalize(state);
|
||||
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
match size {
|
||||
1 => {
|
||||
// This is one of the few places where the presence of a
|
||||
|
@ -736,9 +1044,11 @@ pub(crate) fn emit(
|
|||
} => {
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::Left => 4,
|
||||
ShiftKind::RightZ => 5,
|
||||
ShiftKind::RightS => 7,
|
||||
ShiftKind::RotateLeft => 0,
|
||||
ShiftKind::RotateRight => 1,
|
||||
ShiftKind::ShiftLeft => 4,
|
||||
ShiftKind::ShiftRightLogical => 5,
|
||||
ShiftKind::ShiftRightArithmetic => 7,
|
||||
};
|
||||
|
||||
let rex = if *is_64 {
|
||||
|
@ -849,6 +1159,30 @@ pub(crate) fn emit(
|
|||
);
|
||||
}
|
||||
|
||||
Inst::Cmove {
|
||||
size,
|
||||
cc,
|
||||
src,
|
||||
dst: reg_g,
|
||||
} => {
|
||||
let (prefix, rex_flags) = match size {
|
||||
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||
_ => unreachable!("invalid size spec for cmove"),
|
||||
};
|
||||
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||
match src {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex_flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Push64 { src } => {
|
||||
match src {
|
||||
RegMemImm::Reg { reg } => {
|
||||
|
@ -1027,10 +1361,112 @@ pub(crate) fn emit(
|
|||
}
|
||||
}
|
||||
|
||||
Inst::JmpTableSeq {
|
||||
idx,
|
||||
tmp1,
|
||||
tmp2,
|
||||
ref targets,
|
||||
default_target,
|
||||
..
|
||||
} => {
|
||||
// This sequence is *one* instruction in the vcode, and is expanded only here at
|
||||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
||||
// the middle; we depend on hardcoded PC-rel addressing below.
|
||||
//
|
||||
// We don't have to worry about emitting islands, because the only label-use type has a
|
||||
// maximum range of 2 GB. If we later consider using shorter-range label references,
|
||||
// this will need to be revisited.
|
||||
|
||||
// Save index in a tmp (the live range of ridx only goes to start of this
|
||||
// sequence; rtmp1 or rtmp2 may overwrite it).
|
||||
|
||||
// We generate the following sequence:
|
||||
// ;; generated by lowering: cmp #jmp_table_size, %idx
|
||||
// jnb $default_target
|
||||
// movl %idx, %tmp2
|
||||
// lea start_of_jump_table_offset(%rip), %tmp1
|
||||
// movzlq [%tmp1, %tmp2], %tmp2
|
||||
// addq %tmp2, %tmp1
|
||||
// j *%tmp1
|
||||
// $start_of_jump_table:
|
||||
// -- jump table entries
|
||||
let default_label = match default_target {
|
||||
BranchTarget::Label(label) => label,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size
|
||||
|
||||
// Copy the index (and make sure to clear the high 32-bits lane of tmp2).
|
||||
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2, None);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Load base address of jump table.
|
||||
let start_of_jumptable = sink.get_label();
|
||||
let inst = Inst::lea(
|
||||
Amode::rip_relative(BranchTarget::Label(start_of_jumptable)),
|
||||
*tmp1,
|
||||
);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Load value out of jump table.
|
||||
let inst = Inst::movzx_rm_r(
|
||||
ExtMode::LQ,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
|
||||
*tmp2,
|
||||
None,
|
||||
);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Add base of jump table to jump-table-sourced block offset.
|
||||
let inst = Inst::alu_rmi_r(
|
||||
true, /* is_64 */
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(tmp2.to_reg()),
|
||||
*tmp1,
|
||||
);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Branch to computed address.
|
||||
let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Emit jump table (table of 32-bit offsets).
|
||||
sink.bind_label(start_of_jumptable);
|
||||
let jt_off = sink.cur_offset();
|
||||
for &target in targets.iter() {
|
||||
let word_off = sink.cur_offset();
|
||||
// off_into_table is an addend here embedded in the label to be later patched at
|
||||
// the end of codegen. The offset is initially relative to this jump table entry;
|
||||
// with the extra addend, it'll be relative to the jump table's start, after
|
||||
// patching.
|
||||
let off_into_table = word_off - jt_off;
|
||||
sink.use_label_at_offset(word_off, target.as_label().unwrap(), LabelUse::PCRel32);
|
||||
sink.put4(off_into_table);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::TrapIf {
|
||||
cc,
|
||||
trap_code,
|
||||
srcloc,
|
||||
} => {
|
||||
let else_label = sink.get_label();
|
||||
|
||||
// Jump over if the invert of CC is set (i.e. CC is not set).
|
||||
one_way_jmp(sink, cc.invert(), else_label);
|
||||
|
||||
// Trap!
|
||||
let inst = Inst::trap(*srcloc, *trap_code);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
sink.bind_label(else_label);
|
||||
}
|
||||
|
||||
Inst::XMM_Mov_RM_R {
|
||||
op,
|
||||
src: src_e,
|
||||
dst: reg_g,
|
||||
srcloc,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
|
@ -1045,9 +1481,12 @@ pub(crate) fn emit(
|
|||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
|
@ -1075,14 +1514,19 @@ pub(crate) fn emit(
|
|||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
}
|
||||
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::XMM_Mov_R_M { op, src, dst } => {
|
||||
|
||||
Inst::XMM_Mov_R_M {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E),
|
||||
|
@ -1091,8 +1535,32 @@ pub(crate) fn emit(
|
|||
};
|
||||
|
||||
let dst = &dst.finalize(state);
|
||||
if let Some(srcloc) = *srcloc {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex);
|
||||
}
|
||||
|
||||
Inst::LoadExtName {
|
||||
dst,
|
||||
name,
|
||||
offset,
|
||||
srcloc,
|
||||
} => {
|
||||
// The full address can be encoded in the register, with a relocation.
|
||||
// Generates: movabsq $name, %dst
|
||||
let enc_dst = int_reg_enc(dst.to_reg());
|
||||
sink.put1(0x48 | ((enc_dst >> 3) & 1));
|
||||
sink.put1(0xB8 | (enc_dst & 7));
|
||||
sink.add_reloc(*srcloc, Reloc::Abs8, name, *offset);
|
||||
if flags.emit_all_ones_funcaddrs() {
|
||||
sink.put8(u64::max_value());
|
||||
} else {
|
||||
sink.put8(0);
|
||||
}
|
||||
}
|
||||
|
||||
Inst::Hlt => {
|
||||
sink.put1(0xcc);
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -4,16 +4,17 @@
|
|||
#![allow(non_snake_case)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
|
@ -49,6 +50,49 @@ pub enum Inst {
|
|||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
|
||||
UnaryRmR {
|
||||
size: u8, // 2, 4 or 8
|
||||
op: UnaryRmROpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
|
||||
Div {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
signed: bool,
|
||||
divisor: RegMem,
|
||||
loc: SourceLoc,
|
||||
},
|
||||
|
||||
/// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
|
||||
MulHi { size: u8, signed: bool, rhs: RegMem },
|
||||
|
||||
/// A synthetic sequence to implement the right inline checks for remainder and division,
|
||||
/// assuming the dividend is in %rax.
|
||||
/// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div
|
||||
/// instruction does.
|
||||
/// The generated code sequence is described in the emit's function match arm for this
|
||||
/// instruction.
|
||||
///
|
||||
/// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem
|
||||
/// with the temporary and divisor registers. Make sure to zero %rdx right before this
|
||||
/// instruction, or you might run into regalloc failures where %rdx is live before its first
|
||||
/// def!
|
||||
CheckedDivOrRemSeq {
|
||||
kind: DivOrRemKind,
|
||||
size: u8,
|
||||
divisor: Reg,
|
||||
tmp: Option<Writable<Reg>>,
|
||||
loc: SourceLoc,
|
||||
},
|
||||
|
||||
/// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
|
||||
SignExtendRaxRdx {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
},
|
||||
|
||||
/// Constant materialization: (imm32 imm64) reg.
|
||||
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
|
||||
Imm_R {
|
||||
|
@ -71,12 +115,16 @@ pub enum Inst {
|
|||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
|
||||
Mov64_M_R {
|
||||
src: SyntheticAmode,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Loads the memory address of addr into dst.
|
||||
|
@ -90,6 +138,8 @@ pub enum Inst {
|
|||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Integer stores: mov (b w l q) reg addr.
|
||||
|
@ -97,6 +147,8 @@ pub enum Inst {
|
|||
size: u8, // 1, 2, 4 or 8.
|
||||
src: Reg,
|
||||
dst: SyntheticAmode,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// Arithmetic shifts: (shl shr sar) (l q) imm reg.
|
||||
|
@ -118,6 +170,16 @@ pub enum Inst {
|
|||
/// Materializes the requested condition code in the destination reg.
|
||||
Setcc { cc: CC, dst: Writable<Reg> },
|
||||
|
||||
/// Integer conditional move.
|
||||
/// Overwrites the destination register.
|
||||
Cmove {
|
||||
/// Possible values are 2, 4 or 8. Checked in the related factory.
|
||||
size: u8,
|
||||
cc: CC,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
// Stack manipulation.
|
||||
/// pushq (reg addr imm)
|
||||
|
@ -143,6 +205,8 @@ pub enum Inst {
|
|||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
/// mov reg addr (good for all memory stores from xmm registers)
|
||||
|
@ -150,6 +214,8 @@ pub enum Inst {
|
|||
op: SseOpcode,
|
||||
src: Reg,
|
||||
dst: SyntheticAmode,
|
||||
/// Source location, if the memory access can be out-of-bounds.
|
||||
srcloc: Option<SourceLoc>,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
|
@ -190,15 +256,42 @@ pub enum Inst {
|
|||
not_taken: BranchTarget,
|
||||
},
|
||||
|
||||
/// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale).
|
||||
/// The generated code sequence is described in the emit's function match arm for this
|
||||
/// instruction.
|
||||
JmpTableSeq {
|
||||
idx: Reg,
|
||||
tmp1: Writable<Reg>,
|
||||
tmp2: Writable<Reg>,
|
||||
default_target: BranchTarget,
|
||||
targets: Vec<BranchTarget>,
|
||||
targets_for_term: Vec<MachLabel>,
|
||||
},
|
||||
|
||||
/// Indirect jump: jmpq (reg mem).
|
||||
JmpUnknown { target: RegMem },
|
||||
|
||||
/// Traps if the condition code is set.
|
||||
TrapIf {
|
||||
cc: CC,
|
||||
trap_code: TrapCode,
|
||||
srcloc: SourceLoc,
|
||||
},
|
||||
|
||||
/// A debug trap.
|
||||
Hlt,
|
||||
|
||||
/// An instruction that will always trigger the illegal instruction exception.
|
||||
Ud2 { trap_info: (SourceLoc, TrapCode) },
|
||||
|
||||
/// Loads an external symbol in a register, with a relocation: movabsq $name, dst
|
||||
LoadExtName {
|
||||
dst: Writable<Reg>,
|
||||
name: Box<ExternalName>,
|
||||
srcloc: SourceLoc,
|
||||
offset: i64,
|
||||
},
|
||||
|
||||
// =====================================
|
||||
// Meta-instructions generating no code.
|
||||
/// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
|
||||
|
@ -206,15 +299,13 @@ pub enum Inst {
|
|||
VirtualSPOffsetAdj { offset: i64 },
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
// For various sizes, will some number of lowest bits sign extend to be the
|
||||
// same as the whole value?
|
||||
pub(crate) fn low32willSXto64(x: u64) -> bool {
|
||||
pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
|
||||
let xs = x as i64;
|
||||
xs == ((xs << 32) >> 32)
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
impl Inst {
|
||||
pub(crate) fn nop(len: u8) -> Self {
|
||||
debug_assert!(len <= 16);
|
||||
|
@ -236,10 +327,45 @@ impl Inst {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn unary_rm_r(
|
||||
size: u8,
|
||||
op: UnaryRmROpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
) -> Self {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||
Self::UnaryRmR { size, op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
divisor,
|
||||
loc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
Inst::MulHi { size, signed, rhs }
|
||||
}
|
||||
|
||||
pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||
Inst::SignExtendRaxRdx { size }
|
||||
}
|
||||
|
||||
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
if !dst_is_64 {
|
||||
debug_assert!(low32willSXto64(simm64));
|
||||
debug_assert!(
|
||||
low32_will_sign_extend_to_64(simm64),
|
||||
"{} won't sign-extend to 64 bits!",
|
||||
simm64
|
||||
);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
|
@ -254,9 +380,19 @@ impl Inst {
|
|||
Inst::Mov_R_R { is_64, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn xmm_mov_rm_r(
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||
Inst::XMM_Mov_RM_R { op, src, dst }
|
||||
Inst::XMM_Mov_RM_R {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
|
||||
|
@ -264,37 +400,77 @@ impl Inst {
|
|||
Inst::XMM_RM_R { op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
|
||||
pub(crate) fn xmm_mov_r_m(
|
||||
op: SseOpcode,
|
||||
src: Reg,
|
||||
dst: impl Into<SyntheticAmode>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(src.get_class() == RegClass::V128);
|
||||
Inst::XMM_Mov_R_M {
|
||||
op,
|
||||
src,
|
||||
dst: dst.into(),
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movzx_rm_r(
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst }
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
|
||||
pub(crate) fn movsx_rm_r(
|
||||
ext_mode: ExtMode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode,
|
||||
src,
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(
|
||||
src: impl Into<SyntheticAmode>,
|
||||
dst: Writable<Reg>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov64_M_R {
|
||||
src: src.into(),
|
||||
dst,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst }
|
||||
/// A convenience function to be able to use a RegMem as the source of a move.
|
||||
pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>, srcloc: Option<SourceLoc>) -> Inst {
|
||||
match src {
|
||||
RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
|
||||
RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_m(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
dst: impl Into<SyntheticAmode>,
|
||||
srcloc: Option<SourceLoc>,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
|
@ -302,6 +478,7 @@ impl Inst {
|
|||
size,
|
||||
src,
|
||||
dst: dst.into(),
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,11 +522,23 @@ impl Inst {
|
|||
Inst::Cmp_RMI_R { size, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn trap(srcloc: SourceLoc, trap_code: TrapCode) -> Inst {
|
||||
Inst::Ud2 {
|
||||
trap_info: (srcloc, trap_code),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Setcc { cc, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Cmove { size, cc, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
||||
Inst::Push64 { src }
|
||||
}
|
||||
|
@ -413,6 +602,14 @@ impl Inst {
|
|||
pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
|
||||
Inst::JmpUnknown { target }
|
||||
}
|
||||
|
||||
pub(crate) fn trap_if(cc: CC, trap_code: TrapCode, srcloc: SourceLoc) -> Inst {
|
||||
Inst::TrapIf {
|
||||
cc,
|
||||
trap_code,
|
||||
srcloc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -458,6 +655,7 @@ impl ShowWithRRU for Inst {
|
|||
|
||||
match self {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
|
@ -469,13 +667,68 @@ impl ShowWithRRU for Inst {
|
|||
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
|
||||
),
|
||||
Inst::XMM_Mov_RM_R { op, src, dst } => format!(
|
||||
|
||||
Inst::UnaryRmR { src, dst, op, size } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2(op.to_string(), suffixBWLQ(*size)),
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, *size),
|
||||
),
|
||||
|
||||
Inst::Div {
|
||||
size,
|
||||
signed,
|
||||
divisor,
|
||||
..
|
||||
} => format!(
|
||||
"{} {}",
|
||||
ljustify(if *signed {
|
||||
"idiv".to_string()
|
||||
} else {
|
||||
"div".into()
|
||||
}),
|
||||
divisor.show_rru_sized(mb_rru, *size)
|
||||
),
|
||||
Inst::MulHi {
|
||||
size, signed, rhs, ..
|
||||
} => format!(
|
||||
"{} {}",
|
||||
ljustify(if *signed {
|
||||
"imul".to_string()
|
||||
} else {
|
||||
"mul".to_string()
|
||||
}),
|
||||
rhs.show_rru_sized(mb_rru, *size)
|
||||
),
|
||||
Inst::CheckedDivOrRemSeq {
|
||||
kind,
|
||||
size,
|
||||
divisor,
|
||||
..
|
||||
} => format!(
|
||||
"{} $rax:$rdx, {}",
|
||||
match kind {
|
||||
DivOrRemKind::SignedDiv => "sdiv",
|
||||
DivOrRemKind::UnsignedDiv => "udiv",
|
||||
DivOrRemKind::SignedRem => "srem",
|
||||
DivOrRemKind::UnsignedRem => "urem",
|
||||
},
|
||||
show_ireg_sized(*divisor, mb_rru, *size),
|
||||
),
|
||||
Inst::SignExtendRaxRdx { size } => match size {
|
||||
2 => "cwd",
|
||||
4 => "cdq",
|
||||
8 => "cqo",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.into(),
|
||||
Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
src.show_rru_sized(mb_rru, op.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
Inst::XMM_Mov_R_M { op, src, dst } => format!(
|
||||
Inst::XMM_Mov_R_M { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
show_ireg_sized(*src, mb_rru, 8),
|
||||
|
@ -514,7 +767,9 @@ impl ShowWithRRU for Inst {
|
|||
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
Inst::MovZX_RM_R { ext_mode, src, dst } => {
|
||||
Inst::MovZX_RM_R {
|
||||
ext_mode, src, dst, ..
|
||||
} => {
|
||||
if *ext_mode == ExtMode::LQ {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
|
@ -531,7 +786,7 @@ impl ShowWithRRU for Inst {
|
|||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } => format!(
|
||||
Inst::Mov64_M_R { src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movq".to_string()),
|
||||
src.show_rru(mb_rru),
|
||||
|
@ -543,13 +798,15 @@ impl ShowWithRRU for Inst {
|
|||
addr.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::MovSX_RM_R { ext_mode, src, dst } => format!(
|
||||
Inst::MovSX_RM_R {
|
||||
ext_mode, src, dst, ..
|
||||
} => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movs".to_string(), ext_mode.to_string()),
|
||||
src.show_rru_sized(mb_rru, ext_mode.src_size()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
|
||||
),
|
||||
Inst::Mov_R_M { size, src, dst } => format!(
|
||||
Inst::Mov_R_M { size, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixBWLQ(*size)),
|
||||
show_ireg_sized(*src, mb_rru, *size),
|
||||
|
@ -585,6 +842,12 @@ impl ShowWithRRU for Inst {
|
|||
ljustify2("set".to_string(), cc.to_string()),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 1)
|
||||
),
|
||||
Inst::Cmove { size, cc, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(format!("cmov{}{}", cc.to_string(), suffixBWLQ(*size))),
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, *size)
|
||||
),
|
||||
Inst::Push64 { src } => {
|
||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||
}
|
||||
|
@ -612,12 +875,27 @@ impl ShowWithRRU for Inst {
|
|||
taken.show_rru(mb_rru),
|
||||
not_taken.show_rru(mb_rru)
|
||||
),
|
||||
Inst::JmpTableSeq { idx, .. } => {
|
||||
format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
|
||||
}
|
||||
//
|
||||
Inst::JmpUnknown { target } => format!(
|
||||
"{} *{}",
|
||||
ljustify("jmp".to_string()),
|
||||
target.show_rru(mb_rru)
|
||||
),
|
||||
Inst::TrapIf { cc, trap_code, .. } => {
|
||||
format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
|
||||
}
|
||||
Inst::LoadExtName {
|
||||
dst, name, offset, ..
|
||||
} => format!(
|
||||
"{} {}+{}, {}",
|
||||
ljustify("movaps".into()),
|
||||
name,
|
||||
offset,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
|
||||
Inst::Hlt => "hlt".into(),
|
||||
Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1),
|
||||
|
@ -639,16 +917,36 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
|
||||
// sets.
|
||||
match inst {
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
src,
|
||||
dst,
|
||||
} => {
|
||||
Inst::Alu_RMI_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::XMM_Mov_RM_R { src, dst, .. } => {
|
||||
Inst::Div { divisor, .. } => {
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
divisor.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::MulHi { rhs, .. } => {
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_def(Writable::from_reg(regs::rdx()));
|
||||
rhs.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
|
||||
// Mark both fixed registers as mods, to avoid an early clobber problem in codegen
|
||||
// (i.e. the temporary is allocated one of the fixed registers). This requires writing
|
||||
// the rdx register *before* the instruction, which is not too bad.
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
collector.add_use(*divisor);
|
||||
if let Some(tmp) = tmp {
|
||||
collector.add_def(*tmp);
|
||||
}
|
||||
}
|
||||
Inst::SignExtendRaxRdx { .. } => {
|
||||
collector.add_use(regs::rax());
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
}
|
||||
Inst::UnaryRmR { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
|
@ -671,7 +969,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst)
|
||||
}
|
||||
|
@ -683,24 +981,23 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||
collector.add_use(*src);
|
||||
dst.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
Inst::Shift_R { num_bits, dst, .. } => {
|
||||
if num_bits.is_none() {
|
||||
collector.add_use(regs::rcx());
|
||||
}
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R { size: _, src, dst } => {
|
||||
Inst::Cmp_RMI_R { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_use(*dst); // yes, really `add_use`
|
||||
}
|
||||
Inst::Setcc { dst, .. } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Cmove { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(Writable::from_reg(regs::rsp()));
|
||||
|
@ -727,12 +1024,31 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||
dest.get_regs_as_uses(collector);
|
||||
}
|
||||
|
||||
Inst::JmpTableSeq {
|
||||
ref idx,
|
||||
ref tmp1,
|
||||
ref tmp2,
|
||||
..
|
||||
} => {
|
||||
collector.add_use(*idx);
|
||||
collector.add_def(*tmp1);
|
||||
collector.add_def(*tmp2);
|
||||
}
|
||||
|
||||
Inst::JmpUnknown { target } => {
|
||||
target.get_regs_as_uses(collector);
|
||||
}
|
||||
|
||||
Inst::LoadExtName { dst, .. } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Hlt
|
||||
| Inst::Ud2 { .. } => {
|
||||
|
@ -768,19 +1084,18 @@ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
|||
impl Amode {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
Amode::ImmReg {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
} => map_use(map, base),
|
||||
Amode::ImmReg { ref mut base, .. } => map_use(map, base),
|
||||
Amode::ImmRegRegShift {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
ref mut index,
|
||||
shift: _,
|
||||
..
|
||||
} => {
|
||||
map_use(map, base);
|
||||
map_use(map, index);
|
||||
}
|
||||
Amode::RipRelative { .. } => {
|
||||
// RIP isn't involved in regalloc.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -790,7 +1105,7 @@ impl RegMemImm {
|
|||
match self {
|
||||
RegMemImm::Reg { ref mut reg } => map_use(map, reg),
|
||||
RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
|
||||
RegMemImm::Imm { simm32: _ } => {}
|
||||
RegMemImm::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -799,7 +1114,7 @@ impl RegMem {
|
|||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RegMem::Reg { ref mut reg } => map_use(map, reg),
|
||||
RegMem::Mem { ref mut addr } => addr.map_uses(map),
|
||||
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -809,18 +1124,31 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
|
||||
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
|
||||
Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
|
||||
map_use(mapper, divisor);
|
||||
if let Some(tmp) = tmp {
|
||||
map_def(mapper, tmp)
|
||||
}
|
||||
}
|
||||
Inst::SignExtendRaxRdx { .. } => {}
|
||||
Inst::XMM_Mov_RM_R {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
}
|
||||
| Inst::UnaryRmR {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
|
@ -841,15 +1169,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||
map_use(mapper, src);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
ref mut dst,
|
||||
} => map_def(mapper, dst),
|
||||
Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
|
||||
Inst::Mov_R_R {
|
||||
is_64: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
map_def(mapper, dst);
|
||||
|
@ -862,7 +1186,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
|
||||
src.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
|
@ -882,23 +1206,26 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||
map_use(mapper, src);
|
||||
dst.map_uses(mapper);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits: _,
|
||||
ref mut dst,
|
||||
} => {
|
||||
Inst::Shift_R { ref mut dst, .. } => {
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_use(mapper, dst);
|
||||
}
|
||||
Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
|
||||
Inst::Cmove {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst)
|
||||
}
|
||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||
Inst::Pop64 { ref mut dst } => {
|
||||
map_def(mapper, dst);
|
||||
|
@ -932,12 +1259,27 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||
dest.map_uses(mapper);
|
||||
}
|
||||
|
||||
Inst::JmpTableSeq {
|
||||
ref mut idx,
|
||||
ref mut tmp1,
|
||||
ref mut tmp2,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, idx);
|
||||
map_def(mapper, tmp1);
|
||||
map_def(mapper, tmp2);
|
||||
}
|
||||
|
||||
Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
|
||||
|
||||
Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
|
||||
|
||||
Inst::Ret
|
||||
| Inst::EpiloguePlaceholder
|
||||
| Inst::JmpKnown { .. }
|
||||
| Inst::JmpCond { .. }
|
||||
| Inst::Nop { .. }
|
||||
| Inst::JmpUnknown { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Ud2 { .. }
|
||||
| Inst::Hlt => {
|
||||
|
@ -964,8 +1306,10 @@ impl MachInst for Inst {
|
|||
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
|
||||
// %reg.
|
||||
match self {
|
||||
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
|
||||
Self::XMM_Mov_RM_R { op, src, dst }
|
||||
Self::Mov_R_R {
|
||||
is_64, src, dst, ..
|
||||
} if *is_64 => Some((*dst, *src)),
|
||||
Self::XMM_Mov_RM_R { op, src, dst, .. }
|
||||
if *op == SseOpcode::Movss
|
||||
|| *op == SseOpcode::Movsd
|
||||
|| *op == SseOpcode::Movaps =>
|
||||
|
@ -994,10 +1338,12 @@ impl MachInst for Inst {
|
|||
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
|
||||
&Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()),
|
||||
&Self::JmpCond {
|
||||
cc: _,
|
||||
taken,
|
||||
not_taken,
|
||||
taken, not_taken, ..
|
||||
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
|
||||
&Self::JmpTableSeq {
|
||||
ref targets_for_term,
|
||||
..
|
||||
} => MachTerminator::Indirect(&targets_for_term[..]),
|
||||
// All other cases are boring.
|
||||
_ => MachTerminator::None,
|
||||
}
|
||||
|
@ -1011,8 +1357,8 @@ impl MachInst for Inst {
|
|||
match rc_dst {
|
||||
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
|
||||
RegClass::V128 => match ty {
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg),
|
||||
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg),
|
||||
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
|
||||
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
|
||||
_ => panic!("unexpected V128 type in gen_move"),
|
||||
},
|
||||
_ => panic!("gen_move(x64): unhandled regclass"),
|
||||
|
@ -1035,6 +1381,7 @@ impl MachInst for Inst {
|
|||
match ty {
|
||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
|
||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
ty
|
||||
|
@ -1046,10 +1393,11 @@ impl MachInst for Inst {
|
|||
Inst::jmp_known(BranchTarget::Label(label))
|
||||
}
|
||||
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]> {
|
||||
let mut ret = SmallVec::new();
|
||||
let is64 = value > 0xffff_ffff;
|
||||
ret.push(Inst::imm_r(is64, value, to_reg));
|
||||
debug_assert!(ty.is_int(), "float constants NYI");
|
||||
let is_64 = ty == I64 && value > 0x7fffffff;
|
||||
ret.push(Inst::imm_r(is_64, value, to_reg));
|
||||
ret
|
||||
}
|
||||
|
||||
|
@ -1061,6 +1409,10 @@ impl MachInst for Inst {
|
|||
15
|
||||
}
|
||||
|
||||
fn ref_type_regclass(_: &settings::Flags) -> RegClass {
|
||||
RegClass::I64
|
||||
}
|
||||
|
||||
type LabelUse = LabelUse;
|
||||
}
|
||||
|
||||
|
@ -1076,6 +1428,18 @@ impl MachInstEmit for Inst {
|
|||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut Self::State) {
|
||||
emit::emit(self, sink, flags, state);
|
||||
}
|
||||
|
||||
fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String {
|
||||
self.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl MachInstEmitState<Inst> for EmitState {
|
||||
fn new(_: &dyn ABIBody<I = Inst>) -> Self {
|
||||
EmitState {
|
||||
virtual_sp_offset: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A label-use (internal relocation) in generated code.
|
||||
|
@ -1085,6 +1449,10 @@ pub enum LabelUse {
|
|||
/// location. Used for control flow instructions which consider an offset from the start of the
|
||||
/// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
|
||||
JmpRel32,
|
||||
|
||||
/// A 32-bit offset from location of relocation itself, added to the existing value at that
|
||||
/// location.
|
||||
PCRel32,
|
||||
}
|
||||
|
||||
impl MachInstLabelUse for LabelUse {
|
||||
|
@ -1092,19 +1460,19 @@ impl MachInstLabelUse for LabelUse {
|
|||
|
||||
fn max_pos_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => 0x7fff_ffff,
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_neg_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => 0x8000_0000,
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => 4,
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1119,24 +1487,29 @@ impl MachInstLabelUse for LabelUse {
|
|||
let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
|
||||
buffer.copy_from_slice(&value.to_le_bytes()[..]);
|
||||
}
|
||||
LabelUse::PCRel32 => {
|
||||
let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
|
||||
let value = pc_rel.wrapping_add(addend);
|
||||
buffer.copy_from_slice(&value.to_le_bytes()[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn supports_veneer(self) -> bool {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => false,
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn veneer_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => 0,
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
|
||||
match self {
|
||||
LabelUse::JmpRel32 => {
|
||||
LabelUse::JmpRel32 | LabelUse::PCRel32 => {
|
||||
panic!("Veneer not supported for JumpRel32 label-use.");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,46 +33,55 @@ fn gpr(enc: u8, index: u8) -> Reg {
|
|||
}
|
||||
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 0)
|
||||
gpr(ENC_R12, 16)
|
||||
}
|
||||
pub(crate) fn r13() -> Reg {
|
||||
gpr(ENC_R13, 1)
|
||||
gpr(ENC_R13, 17)
|
||||
}
|
||||
pub(crate) fn r14() -> Reg {
|
||||
gpr(ENC_R14, 2)
|
||||
}
|
||||
pub(crate) fn r15() -> Reg {
|
||||
gpr(ENC_R15, 3)
|
||||
gpr(ENC_R14, 18)
|
||||
}
|
||||
pub(crate) fn rbx() -> Reg {
|
||||
gpr(ENC_RBX, 4)
|
||||
gpr(ENC_RBX, 19)
|
||||
}
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 5)
|
||||
gpr(6, 20)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 6)
|
||||
gpr(7, 21)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 7)
|
||||
gpr(0, 22)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 8)
|
||||
gpr(1, 23)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 9)
|
||||
gpr(2, 24)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 10)
|
||||
gpr(8, 25)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 11)
|
||||
gpr(9, 26)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 12)
|
||||
gpr(10, 27)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 13)
|
||||
gpr(11, 28)
|
||||
}
|
||||
|
||||
pub(crate) fn r15() -> Reg {
|
||||
// r15 is put aside since this is the pinned register.
|
||||
gpr(ENC_R15, 29)
|
||||
}
|
||||
|
||||
/// The pinned register on this architecture.
|
||||
/// It must be the same as Spidermonkey's HeapReg, as found in this file.
|
||||
/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99
|
||||
pub(crate) fn pinned_reg() -> Reg {
|
||||
r15()
|
||||
}
|
||||
|
||||
fn fpr(enc: u8, index: u8) -> Reg {
|
||||
|
@ -80,52 +89,52 @@ fn fpr(enc: u8, index: u8) -> Reg {
|
|||
}
|
||||
|
||||
pub(crate) fn xmm0() -> Reg {
|
||||
fpr(0, 14)
|
||||
fpr(0, 0)
|
||||
}
|
||||
pub(crate) fn xmm1() -> Reg {
|
||||
fpr(1, 15)
|
||||
fpr(1, 1)
|
||||
}
|
||||
pub(crate) fn xmm2() -> Reg {
|
||||
fpr(2, 16)
|
||||
fpr(2, 2)
|
||||
}
|
||||
pub(crate) fn xmm3() -> Reg {
|
||||
fpr(3, 17)
|
||||
fpr(3, 3)
|
||||
}
|
||||
pub(crate) fn xmm4() -> Reg {
|
||||
fpr(4, 18)
|
||||
fpr(4, 4)
|
||||
}
|
||||
pub(crate) fn xmm5() -> Reg {
|
||||
fpr(5, 19)
|
||||
fpr(5, 5)
|
||||
}
|
||||
pub(crate) fn xmm6() -> Reg {
|
||||
fpr(6, 20)
|
||||
fpr(6, 6)
|
||||
}
|
||||
pub(crate) fn xmm7() -> Reg {
|
||||
fpr(7, 21)
|
||||
fpr(7, 7)
|
||||
}
|
||||
pub(crate) fn xmm8() -> Reg {
|
||||
fpr(8, 22)
|
||||
fpr(8, 8)
|
||||
}
|
||||
pub(crate) fn xmm9() -> Reg {
|
||||
fpr(9, 23)
|
||||
fpr(9, 9)
|
||||
}
|
||||
pub(crate) fn xmm10() -> Reg {
|
||||
fpr(10, 24)
|
||||
fpr(10, 10)
|
||||
}
|
||||
pub(crate) fn xmm11() -> Reg {
|
||||
fpr(11, 25)
|
||||
fpr(11, 11)
|
||||
}
|
||||
pub(crate) fn xmm12() -> Reg {
|
||||
fpr(12, 26)
|
||||
fpr(12, 12)
|
||||
}
|
||||
pub(crate) fn xmm13() -> Reg {
|
||||
fpr(13, 27)
|
||||
fpr(13, 13)
|
||||
}
|
||||
pub(crate) fn xmm14() -> Reg {
|
||||
fpr(14, 28)
|
||||
fpr(14, 14)
|
||||
}
|
||||
pub(crate) fn xmm15() -> Reg {
|
||||
fpr(15, 29)
|
||||
fpr(15, 15)
|
||||
}
|
||||
|
||||
pub(crate) fn rsp() -> Reg {
|
||||
|
@ -139,39 +148,14 @@ pub(crate) fn rbp() -> Reg {
|
|||
///
|
||||
/// The ordering of registers matters, as commented in the file doc comment: assumes the
|
||||
/// calling-convention is SystemV, at the moment.
|
||||
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
|
||||
pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = Vec::<(RealReg, String)>::new();
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Integer regs.
|
||||
let mut base = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
let use_pinned_reg = flags.enable_pinned_reg();
|
||||
|
||||
// XMM registers
|
||||
base = regs.len();
|
||||
let first_fpr = regs.len();
|
||||
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
|
||||
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
|
||||
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
|
||||
|
@ -188,17 +172,61 @@ pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUn
|
|||
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
|
||||
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
|
||||
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
|
||||
let last_fpr = regs.len() - 1;
|
||||
|
||||
// Integer regs.
|
||||
let first_gpr = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
debug_assert_eq!(r15(), pinned_reg());
|
||||
let allocable = if use_pinned_reg {
|
||||
// The pinned register is not allocatable in this case, so record the length before adding
|
||||
// it.
|
||||
let len = regs.len();
|
||||
regs.push((r15().to_real_reg(), "%r15/pinned".into()));
|
||||
len
|
||||
} else {
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.len()
|
||||
};
|
||||
let last_gpr = allocable - 1;
|
||||
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: first_gpr,
|
||||
last: last_gpr,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
first: first_fpr,
|
||||
last: last_fpr,
|
||||
suggested_scratch: Some(xmm15().get_index()),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
// Sanity-check: the index passed to the Reg ctor must match the order in the register list.
|
||||
for (i, reg) in regs.iter().enumerate() {
|
||||
assert_eq!(i, reg.0.get_index());
|
||||
}
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -11,28 +11,33 @@ use crate::isa::Builder as IsaBuilder;
|
|||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{self, Flags};
|
||||
use crate::settings::{self as shared_settings, Flags};
|
||||
|
||||
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
|
||||
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
|
||||
|
||||
use super::TargetIsa;
|
||||
|
||||
mod abi;
|
||||
mod inst;
|
||||
mod lower;
|
||||
mod settings;
|
||||
|
||||
/// An X64 backend.
|
||||
pub(crate) struct X64Backend {
|
||||
triple: Triple,
|
||||
flags: Flags,
|
||||
_x64_flags: x64_settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl X64Backend {
|
||||
/// Create a new X64 backend with the given (shared) flags.
|
||||
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
|
||||
fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
|
||||
let reg_universe = create_reg_universe_systemv(&flags);
|
||||
Self {
|
||||
triple,
|
||||
flags,
|
||||
_x64_flags: x64_flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
|
@ -103,10 +108,17 @@ impl MachBackend for X64Backend {
|
|||
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
|
||||
let backend = X64Backend::new_with_flags(triple, flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
setup: x64_settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
triple: Triple,
|
||||
shared_flags: Flags,
|
||||
builder: shared_settings::Builder,
|
||||
) -> Box<dyn TargetIsa> {
|
||||
let isa_flags = x64_settings::Flags::new(&shared_flags, builder);
|
||||
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
|
|
@ -1071,8 +1071,7 @@ pub fn create_unwind_info(
|
|||
.map(|u| UnwindInfo::SystemV(u))
|
||||
}
|
||||
CallConv::WindowsFastcall => {
|
||||
super::unwind::winx64::create_unwind_info(func, isa, Some(RU::rbp.into()))?
|
||||
.map(|u| UnwindInfo::WindowsX64(u))
|
||||
super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
|
|
|
@ -1313,6 +1313,79 @@ fn expand_fcvt_to_uint_sat(
|
|||
cfg.recompute_block(pos.func, done);
|
||||
}
|
||||
|
||||
// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4.
|
||||
static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
|
||||
0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
|
||||
];
|
||||
|
||||
/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes
|
||||
/// using a long sequence of NaN quieting and truncation. This logic is separate from
|
||||
/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are
|
||||
/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into
|
||||
/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
|
||||
fn expand_fcvt_to_uint_sat_vector(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToUintSat,
|
||||
arg,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let controlling_type = pos.func.dfg.ctrl_typevar(inst);
|
||||
if controlling_type == I32X4 {
|
||||
debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
|
||||
// We must both quiet any NaNs--setting that lane to 0--and saturate any
|
||||
// lanes that might overflow during conversion to the highest/lowest integer
|
||||
// allowed in that lane.
|
||||
let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
|
||||
let max_signed_constant = pos
|
||||
.func
|
||||
.dfg
|
||||
.constants
|
||||
.insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
|
||||
let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
|
||||
let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
|
||||
// Clamp the input to 0 for negative floating point numbers. TODO we need to
|
||||
// convert NaNs to 0 but this doesn't do that?
|
||||
let ge_zero = pos.ins().x86_fmax(arg, zeroes);
|
||||
// Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert.
|
||||
// For floating point numbers above this, CVTTPS2DQ returns the undefined value
|
||||
// 0x80000000.
|
||||
let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
|
||||
let le_max_signed =
|
||||
pos.ins()
|
||||
.fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
|
||||
// Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0.
|
||||
// These lanes have the MSB set to 1 after the XOR. We are trying to calculate a
|
||||
// valid, in-range addend.
|
||||
let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
|
||||
let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
|
||||
let difference = pos
|
||||
.ins()
|
||||
.bxor(minus_max_signed_as_int, le_max_signed_as_int);
|
||||
// Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified
|
||||
// previously (MSB set to 1).
|
||||
let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
|
||||
let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
|
||||
// Convert the original clamped number to an integer and add back in the addend
|
||||
// (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these).
|
||||
let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
|
||||
pos.func.dfg.replace(inst).iadd(converted, addend);
|
||||
} else {
|
||||
unreachable!(
|
||||
"{} should not be legalized in expand_fcvt_to_uint_sat_vector",
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert shuffle instructions.
|
||||
fn convert_shuffle(
|
||||
inst: ir::Inst,
|
||||
|
|
|
@ -57,20 +57,12 @@ fn isa_constructor(
|
|||
|
||||
let isa_flags = settings::Flags::new(&shared_flags, builder);
|
||||
|
||||
if isa_flags.use_new_backend() {
|
||||
#[cfg(not(feature = "x64"))]
|
||||
panic!("new backend x86 support not included by cargo features!");
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
super::x64::isa_builder(triple).finish(shared_flags)
|
||||
} else {
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
|
|
|
@ -13,7 +13,6 @@ use log::warn;
|
|||
pub(crate) fn create_unwind_info(
|
||||
func: &Function,
|
||||
isa: &dyn TargetIsa,
|
||||
frame_register: Option<RegUnit>,
|
||||
) -> CodegenResult<Option<UnwindInfo>> {
|
||||
// Only Windows fastcall is supported for unwind information
|
||||
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
|
||||
|
@ -28,7 +27,6 @@ pub(crate) fn create_unwind_info(
|
|||
let mut prologue_size = 0;
|
||||
let mut unwind_codes = Vec::new();
|
||||
let mut found_end = false;
|
||||
let mut xmm_save_count: u8 = 0;
|
||||
|
||||
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
||||
// x64 ABI prologues cannot exceed 255 bytes in length
|
||||
|
@ -64,16 +62,6 @@ pub(crate) fn create_unwind_info(
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::CopySpecial { src, dst, .. } => {
|
||||
if let Some(frame_register) = frame_register {
|
||||
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
||||
unwind_codes.push(UnwindCode::SetFramePointer {
|
||||
offset: unwind_offset,
|
||||
sp_offset: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::UnaryImm { opcode, imm } => {
|
||||
match opcode {
|
||||
Opcode::Iconst => {
|
||||
|
@ -112,7 +100,6 @@ pub(crate) fn create_unwind_info(
|
|||
{
|
||||
// If this is a save of an FPR, record an unwind operation
|
||||
// Note: the stack_offset here is relative to an adjusted SP
|
||||
// This will be fixed up later to be based on the frame pointer offset
|
||||
if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
|
||||
let offset: i32 = offset.into();
|
||||
unwind_codes.push(UnwindCode::SaveXmm {
|
||||
|
@ -120,8 +107,6 @@ pub(crate) fn create_unwind_info(
|
|||
reg: src as u8,
|
||||
stack_offset: offset as u32,
|
||||
});
|
||||
|
||||
xmm_save_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -136,45 +121,11 @@ pub(crate) fn create_unwind_info(
|
|||
|
||||
assert!(found_end);
|
||||
|
||||
// When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
|
||||
// register minus some offset, forming a "base address". This attempts to calculate the frame register offset
|
||||
// while updating the XMM save offsets to be relative from this "base address" rather than RSP.
|
||||
let mut frame_register_offset = 0;
|
||||
if frame_register.is_some() && xmm_save_count > 0 {
|
||||
// Determine the number of 16-byte slots used for all CSRs (including GPRs)
|
||||
// The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
|
||||
// Assumption: each FPR is stored at a lower address than the previous one
|
||||
let mut last_stack_offset = None;
|
||||
let mut fpr_save_count: u8 = 0;
|
||||
let mut gpr_push_count: u8 = 0;
|
||||
for code in unwind_codes.iter_mut() {
|
||||
match code {
|
||||
UnwindCode::SaveXmm { stack_offset, .. } => {
|
||||
if let Some(last) = last_stack_offset {
|
||||
assert!(last > *stack_offset);
|
||||
}
|
||||
last_stack_offset = Some(*stack_offset);
|
||||
fpr_save_count += 1;
|
||||
*stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
|
||||
}
|
||||
UnwindCode::PushRegister { .. } => {
|
||||
gpr_push_count += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
assert_eq!(fpr_save_count, xmm_save_count);
|
||||
|
||||
// Account for alignment space when there's an odd number of GPR pushes
|
||||
// Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
|
||||
frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
|
||||
}
|
||||
|
||||
Ok(Some(UnwindInfo {
|
||||
flags: 0, // this assumes cranelift functions have no SEH handlers
|
||||
prologue_size: prologue_size as u8,
|
||||
frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
|
||||
frame_register_offset,
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes,
|
||||
}))
|
||||
}
|
||||
|
@ -201,7 +152,7 @@ mod tests {
|
|||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
assert_eq!(
|
||||
create_unwind_info(&context.func, &*isa, None).expect("can create unwind info"),
|
||||
create_unwind_info(&context.func, &*isa).expect("can create unwind info"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
@ -219,7 +170,7 @@ mod tests {
|
|||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
|
@ -228,17 +179,13 @@ mod tests {
|
|||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 9,
|
||||
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 9,
|
||||
size: 64
|
||||
|
@ -247,9 +194,9 @@ mod tests {
|
|||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.emit_size(), 12);
|
||||
assert_eq!(unwind.emit_size(), 8);
|
||||
|
||||
let mut buf = [0u8; 12];
|
||||
let mut buf = [0u8; 8];
|
||||
unwind.emit(&mut buf);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -257,16 +204,12 @@ mod tests {
|
|||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x09, // Prologue size
|
||||
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x02, // Unwind code count (1 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x09, // Prolog offset
|
||||
0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding byte
|
||||
0x00, // Padding byte
|
||||
]
|
||||
);
|
||||
}
|
||||
|
@ -284,7 +227,7 @@ mod tests {
|
|||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
|
@ -293,17 +236,13 @@ mod tests {
|
|||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 10000
|
||||
|
@ -322,16 +261,16 @@ mod tests {
|
|||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x03, // Unwind code count (2 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x1B, // Prolog offset
|
||||
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
|
||||
0xE2, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding
|
||||
0x00, // Padding
|
||||
]
|
||||
);
|
||||
}
|
||||
|
@ -349,7 +288,7 @@ mod tests {
|
|||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into()))
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
|
@ -358,17 +297,13 @@ mod tests {
|
|||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: Some(GPR.index_of(RU::rbp.into()) as u8),
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::SetFramePointer {
|
||||
offset: 5,
|
||||
sp_offset: 0
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 1000000
|
||||
|
@ -377,9 +312,9 @@ mod tests {
|
|||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.emit_size(), 16);
|
||||
assert_eq!(unwind.emit_size(), 12);
|
||||
|
||||
let mut buf = [0u8; 16];
|
||||
let mut buf = [0u8; 12];
|
||||
unwind.emit(&mut buf);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -387,20 +322,16 @@ mod tests {
|
|||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x04, // Unwind code count (3 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x1B, // Prolog offset
|
||||
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
|
||||
0x40, // Byte 1 of size
|
||||
0x42, // Byte 2 of size
|
||||
0x0F, // Byte 3 of size
|
||||
0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding byte
|
||||
0x00, // Padding byte
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
|
@ -19,10 +19,24 @@ use crate::flowgraph::ControlFlowGraph;
|
|||
use crate::ir::types::{I32, I64};
|
||||
use crate::ir::{self, InstBuilder, MemFlags};
|
||||
use crate::isa::TargetIsa;
|
||||
|
||||
#[cfg(any(
|
||||
feature = "x86",
|
||||
feature = "arm32",
|
||||
feature = "arm64",
|
||||
feature = "riscv"
|
||||
))]
|
||||
use crate::predicates;
|
||||
#[cfg(any(
|
||||
feature = "x86",
|
||||
feature = "arm32",
|
||||
feature = "arm64",
|
||||
feature = "riscv"
|
||||
))]
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::timing;
|
||||
use alloc::collections::BTreeSet;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
mod boundary;
|
||||
mod call;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
//! ABI definitions.
|
||||
|
||||
use crate::binemit::Stackmap;
|
||||
use crate::ir::{ArgumentExtension, StackSlot};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
@ -100,6 +101,15 @@ pub trait ABIBody {
|
|||
/// Store to a spillslot.
|
||||
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;
|
||||
|
||||
/// Generate a stackmap, given a list of spillslots and the emission state
|
||||
/// at a given program point (prior to emission fo the safepointing
|
||||
/// instruction).
|
||||
fn spillslots_to_stackmap(
|
||||
&self,
|
||||
slots: &[SpillSlot],
|
||||
state: &<Self::I as MachInstEmit>::State,
|
||||
) -> Stackmap;
|
||||
|
||||
/// Generate a prologue, post-regalloc. This should include any stack
|
||||
/// frame or other setup necessary to use the other methods (`load_arg`,
|
||||
/// `store_retval`, and spillslot accesses.) `self` is mutable so that we
|
||||
|
@ -113,21 +123,34 @@ pub trait ABIBody {
|
|||
/// likely closely related.
|
||||
fn gen_epilogue(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||
/// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
|
||||
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
|
||||
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
|
||||
/// Baldrdash (SpiderMonkey integration).
|
||||
/// Returns the full frame size for the given function, after prologue
|
||||
/// emission has run. This comprises the spill slots and stack-storage slots
|
||||
/// (but not storage for clobbered callee-save registers, arguments pushed
|
||||
/// at callsites within this function, or other ephemeral pushes). This is
|
||||
/// used for ABI variants where the client generates prologue/epilogue code,
|
||||
/// as in Baldrdash (SpiderMonkey integration).
|
||||
fn frame_size(&self) -> u32;
|
||||
|
||||
/// Returns the size of arguments expected on the stack.
|
||||
fn stack_args_size(&self) -> u32;
|
||||
|
||||
/// Get the spill-slot size.
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
|
||||
|
||||
/// Generate a spill.
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;
|
||||
/// Generate a spill. The type, if known, is given; this can be used to
|
||||
/// generate a store instruction optimized for the particular type rather
|
||||
/// than the RegClass (e.g., only F64 that resides in a V128 register). If
|
||||
/// no type is given, the implementation should spill the whole register.
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;
|
||||
|
||||
/// Generate a reload (fill).
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
|
||||
/// Generate a reload (fill). As for spills, the type may be given to allow
|
||||
/// a more optimized load instruction to be generated.
|
||||
fn gen_reload(
|
||||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
ty: Option<Type>,
|
||||
) -> Self::I;
|
||||
}
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state and can
|
||||
|
|
|
@ -140,7 +140,7 @@
|
|||
//! Given these invariants, we argue why each optimization preserves execution
|
||||
//! semantics below (grep for "Preserves execution semantics").
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, Stackmap};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||
use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};
|
||||
|
||||
|
@ -168,6 +168,8 @@ pub struct MachBuffer<I: VCodeInst> {
|
|||
call_sites: SmallVec<[MachCallSite; 16]>,
|
||||
/// Any source location mappings referring to this code.
|
||||
srclocs: SmallVec<[MachSrcLoc; 64]>,
|
||||
/// Any stackmaps referring to this code.
|
||||
stackmaps: SmallVec<[MachStackMap; 8]>,
|
||||
/// The current source location in progress (after `start_srcloc()` and
|
||||
/// before `end_srcloc()`). This is a (start_offset, src_loc) tuple.
|
||||
cur_srcloc: Option<(CodeOffset, SourceLoc)>,
|
||||
|
@ -228,6 +230,8 @@ pub struct MachBufferFinalized {
|
|||
call_sites: SmallVec<[MachCallSite; 16]>,
|
||||
/// Any source location mappings referring to this code.
|
||||
srclocs: SmallVec<[MachSrcLoc; 64]>,
|
||||
/// Any stackmaps referring to this code.
|
||||
stackmaps: SmallVec<[MachStackMap; 8]>,
|
||||
}
|
||||
|
||||
static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff;
|
||||
|
@ -262,6 +266,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||
traps: SmallVec::new(),
|
||||
call_sites: SmallVec::new(),
|
||||
srclocs: SmallVec::new(),
|
||||
stackmaps: SmallVec::new(),
|
||||
cur_srcloc: None,
|
||||
label_offsets: SmallVec::new(),
|
||||
label_aliases: SmallVec::new(),
|
||||
|
@ -1090,6 +1095,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||
traps: self.traps,
|
||||
call_sites: self.call_sites,
|
||||
srclocs: self.srclocs,
|
||||
stackmaps: self.stackmaps,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1149,6 +1155,22 @@ impl<I: VCodeInst> MachBuffer<I> {
|
|||
self.srclocs.push(MachSrcLoc { start, end, loc });
|
||||
}
|
||||
}
|
||||
|
||||
/// Add stackmap metadata for this program point: a set of stack offsets
|
||||
/// (from SP upward) that contain live references.
|
||||
///
|
||||
/// The `offset_to_fp` value is the offset from the nominal SP (at which the
|
||||
/// `stack_offsets` are based) and the FP value. By subtracting
|
||||
/// `offset_to_fp` from each `stack_offsets` element, one can obtain
|
||||
/// live-reference offsets from FP instead.
|
||||
pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) {
|
||||
let offset = self.cur_offset();
|
||||
self.stackmaps.push(MachStackMap {
|
||||
offset,
|
||||
offset_end: offset + insn_len,
|
||||
stackmap,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl MachBufferFinalized {
|
||||
|
@ -1207,6 +1229,11 @@ impl MachBufferFinalized {
|
|||
sink.begin_rodata();
|
||||
sink.end_codegen();
|
||||
}
|
||||
|
||||
/// Get the stackmap metadata for this code.
|
||||
pub fn stackmaps(&self) -> &[MachStackMap] {
|
||||
&self.stackmaps[..]
|
||||
}
|
||||
}
|
||||
|
||||
/// A constant that is deferred to the next constant-pool opportunity.
|
||||
|
@ -1286,6 +1313,19 @@ pub struct MachSrcLoc {
|
|||
pub loc: SourceLoc,
|
||||
}
|
||||
|
||||
/// Record of stackmap metadata: stack offsets containing references.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MachStackMap {
|
||||
/// The code offset at which this stackmap applies.
|
||||
pub offset: CodeOffset,
|
||||
/// The code offset just past the "end" of the instruction: that is, the
|
||||
/// offset of the first byte of the following instruction, or equivalently,
|
||||
/// the start offset plus the instruction length.
|
||||
pub offset_end: CodeOffset,
|
||||
/// The Stackmap itself.
|
||||
pub stackmap: Stackmap,
|
||||
}
|
||||
|
||||
/// Record of branch instruction in the buffer, to facilitate editing.
|
||||
#[derive(Clone, Debug)]
|
||||
struct MachBranch {
|
||||
|
@ -1390,7 +1430,9 @@ mod test {
|
|||
inst.emit(&mut buf, &flags, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Nop4;
|
||||
let inst = Inst::Udf {
|
||||
trap_info: (SourceLoc::default(), TrapCode::Interrupt),
|
||||
};
|
||||
inst.emit(&mut buf, &flags, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
|
@ -1403,14 +1445,13 @@ mod test {
|
|||
|
||||
let mut buf2 = MachBuffer::new();
|
||||
let mut state = Default::default();
|
||||
let inst = Inst::OneWayCondBr {
|
||||
kind: CondBrKind::Zero(xreg(0)),
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
let inst = Inst::TrapIf {
|
||||
kind: CondBrKind::NotZero(xreg(0)),
|
||||
trap_info: (SourceLoc::default(), TrapCode::Interrupt),
|
||||
};
|
||||
inst.emit(&mut buf2, &flags, &mut state);
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf2, &flags, &mut state);
|
||||
inst.emit(&mut buf2, &flags, &mut state);
|
||||
|
||||
let buf2 = buf2.finish();
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ where
|
|||
// Build the lowering context.
|
||||
let lower = Lower::new(f, abi, block_order)?;
|
||||
// Lower the IR.
|
||||
let mut vcode = lower.lower(b)?;
|
||||
let (mut vcode, stackmap_request_info) = lower.lower(b)?;
|
||||
|
||||
debug!(
|
||||
"vcode from lowering: \n{}",
|
||||
|
@ -57,11 +57,23 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// If either there are no reference-typed values, or else there are
|
||||
// but there are no safepoints at which we need to know about them,
|
||||
// then we don't need stackmaps.
|
||||
let sri = if stackmap_request_info.reftyped_vregs.len() > 0
|
||||
&& stackmap_request_info.safepoint_insns.len() > 0
|
||||
{
|
||||
Some(&stackmap_request_info)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers_with_opts(
|
||||
&mut vcode,
|
||||
b.reg_universe(),
|
||||
sri,
|
||||
Options {
|
||||
run_checker,
|
||||
algorithm,
|
||||
|
|
|
@ -17,7 +17,7 @@ use crate::machinst::{
|
|||
};
|
||||
use crate::CodegenResult;
|
||||
|
||||
use regalloc::{Reg, RegClass, VirtualReg, Writable};
|
||||
use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
@ -146,6 +146,8 @@ pub trait LowerCtx {
|
|||
fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: Self::I);
|
||||
/// Emit a machine instruction that is a safepoint.
|
||||
fn emit_safepoint(&mut self, mach_inst: Self::I);
|
||||
/// Indicate that the given input uses the register returned by
|
||||
/// `get_input()`. Codegen may not happen otherwise for the producing
|
||||
/// instruction if it has no side effects and no uses.
|
||||
|
@ -206,6 +208,14 @@ pub trait LowerBackend {
|
|||
}
|
||||
}
|
||||
|
||||
/// A pending instruction to insert and auxiliary information about it: its source location and
|
||||
/// whether it is a safepoint.
|
||||
struct InstTuple<I: VCodeInst> {
|
||||
loc: SourceLoc,
|
||||
is_safepoint: bool,
|
||||
inst: I,
|
||||
}
|
||||
|
||||
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
|
||||
/// from original Inst to MachInsts.
|
||||
pub struct Lower<'func, I: VCodeInst> {
|
||||
|
@ -237,17 +247,17 @@ pub struct Lower<'func, I: VCodeInst> {
|
|||
next_vreg: u32,
|
||||
|
||||
/// Insts in reverse block order, before final copy to vcode.
|
||||
block_insts: Vec<(SourceLoc, I)>,
|
||||
block_insts: Vec<InstTuple<I>>,
|
||||
|
||||
/// Ranges in `block_insts` constituting BBs.
|
||||
block_ranges: Vec<(usize, usize)>,
|
||||
|
||||
/// Instructions collected for the BB in progress, in reverse order, with
|
||||
/// source-locs attached.
|
||||
bb_insts: Vec<(SourceLoc, I)>,
|
||||
bb_insts: Vec<InstTuple<I>>,
|
||||
|
||||
/// Instructions collected for the CLIF inst in progress, in forward order.
|
||||
ir_insts: Vec<I>,
|
||||
ir_insts: Vec<InstTuple<I>>,
|
||||
|
||||
/// The register to use for GetPinnedReg, if any, on this architecture.
|
||||
pinned_reg: Option<Reg>,
|
||||
|
@ -276,6 +286,7 @@ fn alloc_vreg(
|
|||
let v = *next_vreg;
|
||||
*next_vreg += 1;
|
||||
value_regs[value] = Reg::new_virtual(regclass, v);
|
||||
debug!("value {} gets vreg {:?}", value, v);
|
||||
}
|
||||
value_regs[value].as_virtual_reg().unwrap()
|
||||
}
|
||||
|
@ -579,15 +590,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||
}
|
||||
|
||||
fn finish_ir_inst(&mut self, loc: SourceLoc) {
|
||||
for inst in self.ir_insts.drain(..).rev() {
|
||||
self.bb_insts.push((loc, inst));
|
||||
// `bb_insts` is kept in reverse order, so emit the instructions in
|
||||
// reverse order.
|
||||
for mut tuple in self.ir_insts.drain(..).rev() {
|
||||
tuple.loc = loc;
|
||||
self.bb_insts.push(tuple);
|
||||
}
|
||||
}
|
||||
|
||||
fn finish_bb(&mut self) {
|
||||
let start = self.block_insts.len();
|
||||
for pair in self.bb_insts.drain(..).rev() {
|
||||
self.block_insts.push(pair);
|
||||
for tuple in self.bb_insts.drain(..).rev() {
|
||||
self.block_insts.push(tuple);
|
||||
}
|
||||
let end = self.block_insts.len();
|
||||
self.block_ranges.push((start, end));
|
||||
|
@ -595,9 +609,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||
|
||||
fn copy_bbs_to_vcode(&mut self) {
|
||||
for &(start, end) in self.block_ranges.iter().rev() {
|
||||
for &(loc, ref inst) in &self.block_insts[start..end] {
|
||||
for &InstTuple {
|
||||
loc,
|
||||
is_safepoint,
|
||||
ref inst,
|
||||
} in &self.block_insts[start..end]
|
||||
{
|
||||
self.vcode.set_srcloc(loc);
|
||||
self.vcode.push(inst.clone());
|
||||
self.vcode.push(inst.clone(), is_safepoint);
|
||||
}
|
||||
self.vcode.end_bb();
|
||||
}
|
||||
|
@ -645,7 +664,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||
}
|
||||
|
||||
/// Lower the function.
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(
|
||||
mut self,
|
||||
backend: &B,
|
||||
) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
|
||||
debug!("about to lower function: {:?}", self.f);
|
||||
|
||||
// Initialize the ABI object, giving it a temp if requested.
|
||||
|
@ -730,10 +752,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||
self.copy_bbs_to_vcode();
|
||||
|
||||
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
|
||||
let vcode = self.vcode.build();
|
||||
let (vcode, stackmap_info) = self.vcode.build();
|
||||
debug!("built vcode: {:?}", vcode);
|
||||
|
||||
Ok(vcode)
|
||||
Ok((vcode, stackmap_info))
|
||||
}
|
||||
|
||||
/// Get the actual inputs for a value. This is the implementation for
|
||||
|
@ -916,7 +938,19 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
|||
}
|
||||
|
||||
fn emit(&mut self, mach_inst: I) {
|
||||
self.ir_insts.push(mach_inst);
|
||||
self.ir_insts.push(InstTuple {
|
||||
loc: SourceLoc::default(),
|
||||
is_safepoint: false,
|
||||
inst: mach_inst,
|
||||
});
|
||||
}
|
||||
|
||||
fn emit_safepoint(&mut self, mach_inst: I) {
|
||||
self.ir_insts.push(InstTuple {
|
||||
loc: SourceLoc::default(),
|
||||
is_safepoint: true,
|
||||
inst: mach_inst,
|
||||
});
|
||||
}
|
||||
|
||||
fn use_input_reg(&mut self, input: LowerInput) {
|
||||
|
|
|
@ -96,7 +96,7 @@
|
|||
//!
|
||||
//! ```
|
||||
|
||||
use crate::binemit::{CodeInfo, CodeOffset};
|
||||
use crate::binemit::{CodeInfo, CodeOffset, Stackmap};
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{Function, Type};
|
||||
use crate::result::CodegenResult;
|
||||
|
@ -191,6 +191,10 @@ pub trait MachInst: Clone + Debug {
|
|||
/// What is the worst-case instruction size emitted by this instruction type?
|
||||
fn worst_case_size() -> CodeOffset;
|
||||
|
||||
/// What is the register class used for reference types (GC-observable pointers)? Can
|
||||
/// be dependent on compilation flags.
|
||||
fn ref_type_regclass(_flags: &Flags) -> RegClass;
|
||||
|
||||
/// A label-use kind: a type that describes the types of label references that
|
||||
/// can occur in an instruction.
|
||||
type LabelUse: MachInstLabelUse;
|
||||
|
@ -256,9 +260,21 @@ pub enum MachTerminator<'a> {
|
|||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit: MachInst {
|
||||
/// Persistent state carried across `emit` invocations.
|
||||
type State: Default + Clone + Debug;
|
||||
type State: MachInstEmitState<Self>;
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
|
||||
/// Pretty-print the instruction.
|
||||
fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
|
||||
}
|
||||
|
||||
/// A trait describing the emission state carried between MachInsts when
|
||||
/// emitting a function body.
|
||||
pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
|
||||
/// Create a new emission state given the ABI object.
|
||||
fn new(abi: &dyn ABIBody<I = I>) -> Self;
|
||||
/// Update the emission state before emitting an instruction that is a
|
||||
/// safepoint.
|
||||
fn pre_safepoint(&mut self, _stackmap: Stackmap) {}
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
|
|
|
@ -17,14 +17,15 @@
|
|||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||
//! backend pipeline.
|
||||
|
||||
use crate::ir::{self, SourceLoc};
|
||||
use crate::ir::{self, types, SourceLoc};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use regalloc::Function as RegallocFunction;
|
||||
use regalloc::Set as RegallocSet;
|
||||
use regalloc::{
|
||||
BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper,
|
||||
BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
|
||||
StackmapRequestInfo,
|
||||
};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
@ -56,6 +57,9 @@ pub struct VCode<I: VCodeInst> {
|
|||
/// VReg IR-level types.
|
||||
vreg_types: Vec<Type>,
|
||||
|
||||
/// Do we have any ref values among our vregs?
|
||||
have_ref_values: bool,
|
||||
|
||||
/// Lowered machine instructions in order corresponding to the original IR.
|
||||
insts: Vec<I>,
|
||||
|
||||
|
@ -82,6 +86,16 @@ pub struct VCode<I: VCodeInst> {
|
|||
|
||||
/// ABI object.
|
||||
abi: Box<dyn ABIBody<I = I>>,
|
||||
|
||||
/// Safepoint instruction indices. Filled in post-regalloc. (Prior to
|
||||
/// regalloc, the safepoint instructions are listed in the separate
|
||||
/// `StackmapRequestInfo` held separate from the `VCode`.)
|
||||
safepoint_insns: Vec<InsnIndex>,
|
||||
|
||||
/// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s.
|
||||
/// These are used to generate actual stackmaps at emission. Filled in
|
||||
/// post-regalloc.
|
||||
safepoint_slots: Vec<Vec<SpillSlot>>,
|
||||
}
|
||||
|
||||
/// A builder for a VCode function body. This builder is designed for the
|
||||
|
@ -102,6 +116,9 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
/// In-progress VCode.
|
||||
vcode: VCode<I>,
|
||||
|
||||
/// In-progress stackmap-request info.
|
||||
stackmap_info: StackmapRequestInfo,
|
||||
|
||||
/// Index of the last block-start in the vcode.
|
||||
block_start: InsnIndex,
|
||||
|
||||
|
@ -115,9 +132,17 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||
/// Create a new VCodeBuilder.
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
|
||||
let reftype_class = I::ref_type_regclass(abi.flags());
|
||||
let vcode = VCode::new(abi, block_order);
|
||||
let stackmap_info = StackmapRequestInfo {
|
||||
reftype_class,
|
||||
reftyped_vregs: vec![],
|
||||
safepoint_insns: vec![],
|
||||
};
|
||||
|
||||
VCodeBuilder {
|
||||
vcode,
|
||||
stackmap_info,
|
||||
block_start: 0,
|
||||
succ_start: 0,
|
||||
cur_srcloc: SourceLoc::default(),
|
||||
|
@ -142,6 +167,15 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
.resize(vreg.get_index() + 1, ir::types::I8);
|
||||
}
|
||||
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||
if is_reftype(ty) {
|
||||
self.stackmap_info.reftyped_vregs.push(vreg);
|
||||
self.vcode.have_ref_values = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Are there any reference-typed values at all among the vregs?
|
||||
pub fn have_ref_values(&self) -> bool {
|
||||
self.vcode.have_ref_values()
|
||||
}
|
||||
|
||||
/// Set the current block as the entry block.
|
||||
|
@ -166,7 +200,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
}
|
||||
|
||||
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||
pub fn push(&mut self, insn: I) {
|
||||
pub fn push(&mut self, insn: I, is_safepoint: bool) {
|
||||
match insn.is_term() {
|
||||
MachTerminator::None | MachTerminator::Ret => {}
|
||||
MachTerminator::Uncond(target) => {
|
||||
|
@ -186,6 +220,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
}
|
||||
self.vcode.insts.push(insn);
|
||||
self.vcode.srclocs.push(self.cur_srcloc);
|
||||
if is_safepoint {
|
||||
self.stackmap_info
|
||||
.safepoint_insns
|
||||
.push(InstIx::new((self.vcode.insts.len() - 1) as u32));
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current source location.
|
||||
|
@ -198,21 +237,16 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
self.cur_srcloc = srcloc;
|
||||
}
|
||||
|
||||
/// Build the final VCode.
|
||||
pub fn build(self) -> VCode<I> {
|
||||
self.vcode
|
||||
/// Build the final VCode, returning the vcode itself as well as auxiliary
|
||||
/// information, such as the stackmap request information.
|
||||
pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
|
||||
// TODO: come up with an abstraction for "vcode and auxiliary data". The
|
||||
// auxiliary data needs to be separate from the vcode so that it can be
|
||||
// referenced as the vcode is mutated (e.g. by the register allocator).
|
||||
(self.vcode, self.stackmap_info)
|
||||
}
|
||||
}
|
||||
|
||||
fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
|
||||
let v = indices
|
||||
.iter()
|
||||
.map(|iix| iix.get() as usize)
|
||||
.chain(iter::once(len))
|
||||
.collect::<Vec<usize>>();
|
||||
v.windows(2).map(|p| (p[0], p[1])).collect()
|
||||
}
|
||||
|
||||
fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
||||
if let Some((to, from)) = insn.is_move() {
|
||||
to.to_reg() == from
|
||||
|
@ -221,6 +255,11 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Is this type a reference type?
|
||||
fn is_reftype(ty: Type) -> bool {
|
||||
ty == types::R64 || ty == types::R32
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCode<I> {
|
||||
/// New empty VCode.
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
|
||||
|
@ -228,6 +267,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
liveins: abi.liveins(),
|
||||
liveouts: abi.liveouts(),
|
||||
vreg_types: vec![],
|
||||
have_ref_values: false,
|
||||
insts: vec![],
|
||||
srclocs: vec![],
|
||||
entry: 0,
|
||||
|
@ -236,6 +276,8 @@ impl<I: VCodeInst> VCode<I> {
|
|||
block_succs: vec![],
|
||||
block_order,
|
||||
abi,
|
||||
safepoint_insns: vec![],
|
||||
safepoint_slots: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -249,6 +291,11 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.vreg_types[vreg.get_index()]
|
||||
}
|
||||
|
||||
/// Are there any reference-typed values at all among the vregs?
|
||||
pub fn have_ref_values(&self) -> bool {
|
||||
self.have_ref_values
|
||||
}
|
||||
|
||||
/// Get the entry block.
|
||||
pub fn entry(&self) -> BlockIndex {
|
||||
self.entry
|
||||
|
@ -265,6 +312,11 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.abi.frame_size()
|
||||
}
|
||||
|
||||
/// Inbound stack-args size.
|
||||
pub fn stack_args_size(&self) -> u32 {
|
||||
self.abi.stack_args_size()
|
||||
}
|
||||
|
||||
/// Get the successors for a block.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
|
||||
let (start, end) = self.block_succ_range[block as usize];
|
||||
|
@ -281,17 +333,21 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.abi
|
||||
.set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
|
||||
|
||||
// We want to move instructions over in final block order, using the new
|
||||
// block-start map given by the regalloc.
|
||||
let block_ranges: Vec<(usize, usize)> =
|
||||
block_ranges(result.target_map.elems(), result.insns.len());
|
||||
let mut final_insns = vec![];
|
||||
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||
let mut final_srclocs = vec![];
|
||||
let mut final_safepoint_insns = vec![];
|
||||
let mut safept_idx = 0;
|
||||
|
||||
assert!(result.target_map.elems().len() == self.num_blocks());
|
||||
for block in 0..self.num_blocks() {
|
||||
let start = result.target_map.elems()[block].get() as usize;
|
||||
let end = if block == self.num_blocks() - 1 {
|
||||
result.insns.len()
|
||||
} else {
|
||||
result.target_map.elems()[block + 1].get() as usize
|
||||
};
|
||||
let block = block as BlockIndex;
|
||||
let (start, end) = block_ranges[block as usize];
|
||||
let final_start = final_insns.len() as InsnIndex;
|
||||
|
||||
if block == self.entry {
|
||||
|
@ -333,6 +389,16 @@ impl<I: VCodeInst> VCode<I> {
|
|||
final_insns.push(insn.clone());
|
||||
final_srclocs.push(srcloc);
|
||||
}
|
||||
|
||||
// Was this instruction a safepoint instruction? Add its final
|
||||
// index to the safepoint insn-index list if so.
|
||||
if safept_idx < result.new_safepoint_insns.len()
|
||||
&& (result.new_safepoint_insns[safept_idx].get() as usize) == i
|
||||
{
|
||||
let idx = final_insns.len() - 1;
|
||||
final_safepoint_insns.push(idx as InsnIndex);
|
||||
safept_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let final_end = final_insns.len() as InsnIndex;
|
||||
|
@ -344,6 +410,12 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.insts = final_insns;
|
||||
self.srclocs = final_srclocs;
|
||||
self.block_ranges = final_block_ranges;
|
||||
self.safepoint_insns = final_safepoint_insns;
|
||||
|
||||
// Save safepoint slot-lists. These will be passed to the `EmitState`
|
||||
// for the machine backend during emission so that it can do
|
||||
// target-specific translations of slot numbers to stack offsets.
|
||||
self.safepoint_slots = result.stackmaps;
|
||||
}
|
||||
|
||||
/// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
|
||||
|
@ -353,11 +425,12 @@ impl<I: VCodeInst> VCode<I> {
|
|||
I: MachInstEmit,
|
||||
{
|
||||
let mut buffer = MachBuffer::new();
|
||||
let mut state = Default::default();
|
||||
let mut state = I::State::new(&*self.abi);
|
||||
|
||||
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
|
||||
|
||||
let flags = self.abi.flags();
|
||||
let mut safepoint_idx = 0;
|
||||
let mut cur_srcloc = None;
|
||||
for block in 0..self.num_blocks() {
|
||||
let block = block as BlockIndex;
|
||||
|
@ -381,6 +454,19 @@ impl<I: VCodeInst> VCode<I> {
|
|||
cur_srcloc = Some(srcloc);
|
||||
}
|
||||
|
||||
if safepoint_idx < self.safepoint_insns.len()
|
||||
&& self.safepoint_insns[safepoint_idx] == iix
|
||||
{
|
||||
if self.safepoint_slots[safepoint_idx].len() > 0 {
|
||||
let stackmap = self.abi.spillslots_to_stackmap(
|
||||
&self.safepoint_slots[safepoint_idx][..],
|
||||
&state,
|
||||
);
|
||||
state.pre_safepoint(stackmap);
|
||||
}
|
||||
safepoint_idx += 1;
|
||||
}
|
||||
|
||||
self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
|
||||
}
|
||||
|
||||
|
@ -476,13 +562,18 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
|||
self.abi.get_spillslot_size(regclass, ty)
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
|
||||
let ty = vreg.map(|v| self.vreg_type(v));
|
||||
self.abi.gen_spill(to_slot, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
fn gen_reload(
|
||||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
vreg: Option<VirtualReg>,
|
||||
) -> I {
|
||||
let ty = vreg.map(|v| self.vreg_type(v));
|
||||
self.abi.gen_reload(to_reg, from_slot, ty)
|
||||
}
|
||||
|
||||
|
@ -531,7 +622,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
|||
}
|
||||
|
||||
/// Pretty-printing with `RealRegUniverse` context.
|
||||
impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
||||
impl<I: VCodeInst> ShowWithRRU for VCode<I> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
|
@ -539,6 +630,8 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
|||
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
|
||||
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
|
||||
|
||||
let mut state = Default::default();
|
||||
let mut safepoint_idx = 0;
|
||||
for i in 0..self.num_blocks() {
|
||||
let block = i as BlockIndex;
|
||||
|
||||
|
@ -552,11 +645,22 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
|||
let (start, end) = self.block_ranges[block as usize];
|
||||
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
|
||||
for inst in start..end {
|
||||
if safepoint_idx < self.safepoint_insns.len()
|
||||
&& self.safepoint_insns[safepoint_idx] == inst
|
||||
{
|
||||
write!(
|
||||
&mut s,
|
||||
" (safepoint: slots {:?} with EmitState {:?})\n",
|
||||
self.safepoint_slots[safepoint_idx], state,
|
||||
)
|
||||
.unwrap();
|
||||
safepoint_idx += 1;
|
||||
}
|
||||
write!(
|
||||
&mut s,
|
||||
" Inst {}: {}\n",
|
||||
inst,
|
||||
self.insts[inst as usize].show_rru(mb_rru)
|
||||
self.insts[inst as usize].pretty_print(mb_rru, &mut state)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
|
|
@ -191,3 +191,9 @@
|
|||
(=> (when (udiv_imm $C $x)
|
||||
(is-power-of-two $C))
|
||||
(ushr_imm $(log2 $C) $x))
|
||||
|
||||
;; Remainder by a power of two -> bitwise and with decreased by one constant.
|
||||
(=> (when (urem_imm $C $x)
|
||||
(is-power-of-two $C)
|
||||
(fits-in-native-word $C))
|
||||
(band_imm $(isub $C 1) $x))
|
||||
|
|
Двоичный файл не отображается.
|
@ -1,6 +1,7 @@
|
|||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{Function, InstBuilder, Opcode};
|
||||
use crate::inst_predicates::is_safepoint;
|
||||
use crate::ir::{Function, InstBuilder};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::live_value_tracker::LiveValueTracker;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
|
@ -51,12 +52,8 @@ pub fn emit_stackmaps(
|
|||
pos.goto_top(block);
|
||||
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if pos.func.dfg[inst].opcode().is_resumable_trap() {
|
||||
if is_safepoint(&pos.func, inst) {
|
||||
insert_and_encode_safepoint(&mut pos, tracker, isa);
|
||||
} else if pos.func.dfg[inst].opcode().is_call() {
|
||||
insert_and_encode_safepoint(&mut pos, tracker, isa);
|
||||
} else if pos.func.dfg[inst].opcode() == Opcode::Safepoint {
|
||||
panic!("safepoint instruction can only be used by the compiler!");
|
||||
}
|
||||
|
||||
// Process the instruction and get rid of dead values.
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"49bb9e126a98fa9d3d61a69ffaf24d66bab5b65c87f607fdc809a0c68ed607cb","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
||||
{"files":{"Cargo.toml":"0ac209bc13b1152b67c8ab3e0a87ab512d966367758cc7fa131096dbe97a1da8","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-entity"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "Data structures using entity references as mapping keys"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-entity"
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"2633e2c61491f80fbeea54dcf8763ff7c4b91510da00c32fdba8425cf5267a74","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
||||
{"files":{"Cargo.toml":"52587586762dcb18c8ae39de76ef388a78b857d8fecd87b77b6a30dc8f85e1f5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"114e1ff1e5eacaf3c79946fcf441a8f525148a50e94a3f81373d4b745ac09a9f","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
description = "Cranelift IR builder helper"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-frontend"
|
||||
|
@ -11,7 +11,7 @@ readme = "README.md"
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false }
|
||||
target-lexicon = "0.10"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
use super::HashMap;
|
||||
use crate::frontend::FunctionBuilder;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use cranelift_codegen::ir::condcodes::IntCC;
|
||||
use cranelift_codegen::ir::*;
|
||||
use log::debug;
|
||||
|
||||
type EntryIndex = u64;
|
||||
type EntryIndex = u128;
|
||||
|
||||
/// Unlike with `br_table`, `Switch` cases may be sparse or non-0-based.
|
||||
/// They emit efficient code using branches, jump tables, or a combination of both.
|
||||
|
@ -152,11 +153,9 @@ impl Switch {
|
|||
let left_block = bx.create_block();
|
||||
let right_block = bx.create_block();
|
||||
|
||||
let should_take_right_side = bx.ins().icmp_imm(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
val,
|
||||
right[0].first_index as i64,
|
||||
);
|
||||
let first_index = right[0].first_index;
|
||||
let should_take_right_side =
|
||||
icmp_imm_u128(bx, IntCC::UnsignedGreaterThanOrEqual, val, first_index);
|
||||
bx.ins().brnz(should_take_right_side, right_block, &[]);
|
||||
bx.ins().jump(left_block, &[]);
|
||||
|
||||
|
@ -200,7 +199,7 @@ impl Switch {
|
|||
}
|
||||
(1, _) => {
|
||||
ins_fallthrough_jump(was_branch, bx);
|
||||
let is_good_val = bx.ins().icmp_imm(IntCC::Equal, val, first_index as i64);
|
||||
let is_good_val = icmp_imm_u128(bx, IntCC::Equal, val, first_index);
|
||||
bx.ins().brnz(is_good_val, blocks[0], &[]);
|
||||
}
|
||||
(_, 0) => {
|
||||
|
@ -217,11 +216,8 @@ impl Switch {
|
|||
(_, _) => {
|
||||
ins_fallthrough_jump(was_branch, bx);
|
||||
let jt_block = bx.create_block();
|
||||
let is_good_val = bx.ins().icmp_imm(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
val,
|
||||
first_index as i64,
|
||||
);
|
||||
let is_good_val =
|
||||
icmp_imm_u128(bx, IntCC::UnsignedGreaterThanOrEqual, val, first_index);
|
||||
bx.ins().brnz(is_good_val, jt_block, &[]);
|
||||
bx.seal_block(jt_block);
|
||||
cases_and_jt_blocks.push((first_index, jt_block, blocks));
|
||||
|
@ -241,6 +237,13 @@ impl Switch {
|
|||
cases_and_jt_blocks: Vec<(EntryIndex, Block, Vec<Block>)>,
|
||||
) {
|
||||
for (first_index, jt_block, blocks) in cases_and_jt_blocks.into_iter().rev() {
|
||||
// There are currently no 128bit systems supported by rustc, but once we do ensure that
|
||||
// we don't silently ignore a part of the jump table for 128bit integers on 128bit systems.
|
||||
assert!(
|
||||
u64::try_from(blocks.len()).is_ok(),
|
||||
"Jump tables bigger than 2^64-1 are not yet supported"
|
||||
);
|
||||
|
||||
let mut jt_data = JumpTableData::new();
|
||||
for block in blocks {
|
||||
jt_data.push_entry(block);
|
||||
|
@ -251,8 +254,33 @@ impl Switch {
|
|||
let discr = if first_index == 0 {
|
||||
val
|
||||
} else {
|
||||
bx.ins().iadd_imm(val, (first_index as i64).wrapping_neg())
|
||||
if let Ok(first_index) = u64::try_from(first_index) {
|
||||
bx.ins().iadd_imm(val, (first_index as i64).wrapping_neg())
|
||||
} else {
|
||||
let (lsb, msb) = (first_index as u64, (first_index >> 64) as u64);
|
||||
let lsb = bx.ins().iconst(types::I64, lsb as i64);
|
||||
let msb = bx.ins().iconst(types::I64, msb as i64);
|
||||
let index = bx.ins().iconcat(lsb, msb);
|
||||
bx.ins().isub(val, index)
|
||||
}
|
||||
};
|
||||
|
||||
let discr = if bx.func.dfg.value_type(discr).bits() > 64 {
|
||||
// Check for overflow of cast to u64.
|
||||
let new_block = bx.create_block();
|
||||
let bigger_than_u64 =
|
||||
bx.ins()
|
||||
.icmp_imm(IntCC::UnsignedGreaterThan, discr, u64::max_value() as i64);
|
||||
bx.ins().brnz(bigger_than_u64, otherwise, &[]);
|
||||
bx.ins().jump(new_block, &[]);
|
||||
bx.switch_to_block(new_block);
|
||||
|
||||
// Cast to u64, as br_table is not implemented for integers bigger than 64bits.
|
||||
bx.ins().ireduce(types::I64, discr)
|
||||
} else {
|
||||
discr
|
||||
};
|
||||
|
||||
bx.ins().br_table(discr, otherwise, jump_table);
|
||||
}
|
||||
}
|
||||
|
@ -278,6 +306,18 @@ impl Switch {
|
|||
}
|
||||
}
|
||||
|
||||
fn icmp_imm_u128(bx: &mut FunctionBuilder, cond: IntCC, x: Value, y: u128) -> Value {
|
||||
if let Ok(index) = u64::try_from(y) {
|
||||
bx.ins().icmp_imm(cond, x, index as i64)
|
||||
} else {
|
||||
let (lsb, msb) = (y as u64, (y >> 64) as u64);
|
||||
let lsb = bx.ins().iconst(types::I64, lsb as i64);
|
||||
let msb = bx.ins().iconst(types::I64, msb as i64);
|
||||
let index = bx.ins().iconcat(lsb, msb);
|
||||
bx.ins().icmp(cond, x, index)
|
||||
}
|
||||
}
|
||||
|
||||
/// This represents a contiguous range of cases to switch on.
|
||||
///
|
||||
/// For example 10 => block1, 11 => block2, 12 => block7 will be represented as:
|
||||
|
@ -440,7 +480,7 @@ block10:
|
|||
|
||||
#[test]
|
||||
fn switch_min_index_value() {
|
||||
let func = setup!(0, [::core::i64::MIN as u64, 1,]);
|
||||
let func = setup!(0, [::core::i64::MIN as u64 as u128, 1,]);
|
||||
assert_eq!(
|
||||
func,
|
||||
"block0:
|
||||
|
@ -459,7 +499,7 @@ block3:
|
|||
|
||||
#[test]
|
||||
fn switch_max_index_value() {
|
||||
let func = setup!(0, [::core::i64::MAX as u64, 1,]);
|
||||
let func = setup!(0, [::core::i64::MAX as u64 as u128, 1,]);
|
||||
assert_eq!(
|
||||
func,
|
||||
"block0:
|
||||
|
@ -478,7 +518,7 @@ block3:
|
|||
|
||||
#[test]
|
||||
fn switch_optimal_codegen() {
|
||||
let func = setup!(0, [-1i64 as u64, 0, 1,]);
|
||||
let func = setup!(0, [-1i64 as u64 as u128, 0, 1,]);
|
||||
assert_eq!(
|
||||
func,
|
||||
" jt0 = jump_table [block2, block3]
|
||||
|
@ -530,4 +570,45 @@ block4:
|
|||
|
||||
builder.finalize(); // Will panic if some blocks are not sealed
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn switch_128bit() {
|
||||
let mut func = Function::new();
|
||||
let mut func_ctx = FunctionBuilderContext::new();
|
||||
{
|
||||
let mut bx = FunctionBuilder::new(&mut func, &mut func_ctx);
|
||||
let block0 = bx.create_block();
|
||||
bx.switch_to_block(block0);
|
||||
let val = bx.ins().iconst(types::I128, 0);
|
||||
let mut switch = Switch::new();
|
||||
let block1 = bx.create_block();
|
||||
switch.set_entry(1, block1);
|
||||
let block2 = bx.create_block();
|
||||
switch.set_entry(0, block2);
|
||||
let block3 = bx.create_block();
|
||||
switch.emit(&mut bx, val, block3);
|
||||
}
|
||||
let func = func
|
||||
.to_string()
|
||||
.trim_start_matches("function u0:0() fast {\n")
|
||||
.trim_end_matches("\n}\n")
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
func,
|
||||
" jt0 = jump_table [block2, block1]
|
||||
|
||||
block0:
|
||||
v0 = iconst.i128 0
|
||||
jump block4
|
||||
|
||||
block4:
|
||||
v1 = icmp_imm.i128 ugt v0, -1
|
||||
brnz v1, block3
|
||||
jump block5
|
||||
|
||||
block5:
|
||||
v2 = ireduce.i64 v0
|
||||
br_table v2, block3, jt0"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"6c9d8563161a9803e876842482a1c34fd0ea740d5a7141fc51cec3c21ef60eec","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"e8d525ae48f967ebda012981b10dd11fbb46d9223fd95d1e3409da528851fcf7","src/environ/dummy.rs":"922d029491a9f5c55d22fcc9fbeae9e8c6721fa6556527785494f1351874e9f3","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"026a145c1cf9cd25c77e7ea8e0bb43739769dfc4693fcf827f6cdb79acf398a1","src/func_translator.rs":"b4391a11df5c401c9ddd26698105548b7a861c8deb5f84215f0b88cba5327362","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"47b575f0edbe8a2a3334261742870ce7424e13d91f8980609f9c963a2811e1f6","src/sections_translator.rs":"ebd08548e048c7f792da45aa8d710e7d6f047e9197bc86260743c97d00dd99f6","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"0a2a53a7f60a5192661ce4c95ee9bd6775e1eb7d32647e1c6e026b0f8849cd2c","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
|
||||
{"files":{"Cargo.toml":"34ad61b3a40b5bfee68d575e749314cf8395484c0484fd40d39a9bd1f46d3e14","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"77d407a26746381c1f433e0d13e758bfd2864936fc156c5eb5114a7dc146a2f1","src/environ/dummy.rs":"e9b06d1db4d25ab622d133ca927ec524a86d90d49eb67862dea0db734a0eadc4","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"b2ead10ea1f346d6fe2e4a5afc656754f0783fae98a3937b42cc106ad9e5eace","src/func_translator.rs":"48ee25da11063743459f9e9407512413075265e67713c6f5ab733798be2bf19d","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"def8b0853f1e802faf57b38e90016577887a0698a5abce5b3cee4cd67e07ecf0","src/sections_translator.rs":"8bbf6cf774076c88f176296065b392ff21ed512be806629cce5d275271eee3a8","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"7ca3cb06b4481bc3ae74697fbcd437aea1d851eaa3cfe18cc013a4af43728957","src/translation_utils.rs":"69f20c47ea22f0badd21a6187b5f9764252a00d19643a7e3e691797a9fe34f1b","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.65.0"
|
||||
version = "0.66.0"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
description = "Translator from WebAssembly to Cranelift IR"
|
||||
documentation = "https://docs.rs/cranelift-wasm"
|
||||
|
@ -12,10 +12,10 @@ keywords = ["webassembly", "wasm"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
wasmparser = { version = "0.58.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.65.0" }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.65.0", default-features = false }
|
||||
wasmparser = { version = "0.59.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.66.0" }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.66.0", default-features = false }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
|
@ -25,7 +25,7 @@ thiserror = "1.0.4"
|
|||
wat = "1.0.18"
|
||||
target-lexicon = "0.10"
|
||||
# Enable the riscv feature for cranelift-codegen, as some tests require it
|
||||
cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false, features = ["riscv"] }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false, features = ["riscv"] }
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
|
|
|
@ -30,6 +30,7 @@ use crate::translation_utils::{
|
|||
};
|
||||
use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex};
|
||||
use crate::wasm_unsupported;
|
||||
use core::convert::TryInto;
|
||||
use core::{i32, u32};
|
||||
use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
|
||||
use cranelift_codegen::ir::immediates::Offset32;
|
||||
|
@ -1039,8 +1040,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
Operator::F32Le | Operator::F64Le => {
|
||||
translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
|
||||
}
|
||||
Operator::RefNull { ty } => state.push1(environ.translate_ref_null(builder.cursor(), *ty)?),
|
||||
Operator::RefIsNull { ty: _ } => {
|
||||
Operator::RefNull { ty } => {
|
||||
state.push1(environ.translate_ref_null(builder.cursor(), (*ty).try_into()?)?)
|
||||
}
|
||||
Operator::RefIsNull => {
|
||||
let value = state.pop1();
|
||||
state.push1(environ.translate_ref_is_null(builder.cursor(), value)?);
|
||||
}
|
||||
|
@ -1559,22 +1562,59 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
let a = pop1_with_bitcast(state, F32X4, builder);
|
||||
state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
|
||||
}
|
||||
Operator::I32x4TruncSatF32x4U
|
||||
| Operator::I8x16NarrowI16x8S { .. }
|
||||
| Operator::I8x16NarrowI16x8U { .. }
|
||||
| Operator::I16x8NarrowI32x4S { .. }
|
||||
| Operator::I16x8NarrowI32x4U { .. }
|
||||
| Operator::I16x8WidenLowI8x16S { .. }
|
||||
| Operator::I16x8WidenHighI8x16S { .. }
|
||||
| Operator::I16x8WidenLowI8x16U { .. }
|
||||
| Operator::I16x8WidenHighI8x16U { .. }
|
||||
| Operator::I32x4WidenLowI16x8S { .. }
|
||||
| Operator::I32x4WidenHighI16x8S { .. }
|
||||
| Operator::I32x4WidenLowI16x8U { .. }
|
||||
| Operator::I32x4WidenHighI16x8U { .. }
|
||||
| Operator::I8x16Bitmask
|
||||
| Operator::I16x8Bitmask
|
||||
| Operator::I32x4Bitmask => {
|
||||
Operator::I32x4TruncSatF32x4U => {
|
||||
let a = pop1_with_bitcast(state, F32X4, builder);
|
||||
state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
|
||||
}
|
||||
Operator::I8x16NarrowI16x8S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().snarrow(a, b))
|
||||
}
|
||||
Operator::I16x8NarrowI32x4S => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
state.push1(builder.ins().snarrow(a, b))
|
||||
}
|
||||
Operator::I8x16NarrowI16x8U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().unarrow(a, b))
|
||||
}
|
||||
Operator::I16x8NarrowI32x4U => {
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
state.push1(builder.ins().unarrow(a, b))
|
||||
}
|
||||
Operator::I16x8WidenLowI8x16S => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().swiden_low(a))
|
||||
}
|
||||
Operator::I16x8WidenHighI8x16S => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().swiden_high(a))
|
||||
}
|
||||
Operator::I16x8WidenLowI8x16U => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().uwiden_low(a))
|
||||
}
|
||||
Operator::I16x8WidenHighI8x16U => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().uwiden_high(a))
|
||||
}
|
||||
Operator::I32x4WidenLowI16x8S => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().swiden_low(a))
|
||||
}
|
||||
Operator::I32x4WidenHighI16x8S => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().swiden_high(a))
|
||||
}
|
||||
Operator::I32x4WidenLowI16x8U => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().uwiden_low(a))
|
||||
}
|
||||
Operator::I32x4WidenHighI16x8U => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().uwiden_high(a))
|
||||
}
|
||||
Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => {
|
||||
return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
|
||||
}
|
||||
|
||||
|
|
|
@ -547,7 +547,7 @@ impl TargetEnvironment for DummyEnvironment {
|
|||
}
|
||||
|
||||
impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
|
||||
fn declare_signature(&mut self, _wasm: &WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
|
||||
fn declare_signature(&mut self, _wasm: WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
|
||||
self.info.signatures.push(sig);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -12,22 +12,89 @@ use crate::translation_utils::{
|
|||
Table, TableIndex,
|
||||
};
|
||||
use core::convert::From;
|
||||
use core::convert::TryFrom;
|
||||
use cranelift_codegen::cursor::FuncCursor;
|
||||
use cranelift_codegen::ir::immediates::Offset32;
|
||||
use cranelift_codegen::ir::{self, InstBuilder};
|
||||
use cranelift_codegen::isa::TargetFrontendConfig;
|
||||
use cranelift_frontend::FunctionBuilder;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::boxed::Box;
|
||||
use std::string::ToString;
|
||||
use thiserror::Error;
|
||||
use wasmparser::BinaryReaderError;
|
||||
use wasmparser::Operator;
|
||||
|
||||
// Re-export `wasmparser`'s function and value types so that consumers can
|
||||
// associate this the original Wasm signature with each compiled function. This
|
||||
// is often necessary because while each Wasm signature gets compiled down into
|
||||
// a single native signature, multiple Wasm signatures might compile down into
|
||||
// the same native signature.
|
||||
pub use wasmparser::{FuncType as WasmFuncType, Type as WasmType};
|
||||
/// WebAssembly value type -- equivalent of `wasmparser`'s Type.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum WasmType {
|
||||
/// I32 type
|
||||
I32,
|
||||
/// I64 type
|
||||
I64,
|
||||
/// F32 type
|
||||
F32,
|
||||
/// F64 type
|
||||
F64,
|
||||
/// V128 type
|
||||
V128,
|
||||
/// FuncRef type
|
||||
FuncRef,
|
||||
/// ExternRef type
|
||||
ExternRef,
|
||||
}
|
||||
|
||||
impl TryFrom<wasmparser::Type> for WasmType {
|
||||
type Error = WasmError;
|
||||
fn try_from(ty: wasmparser::Type) -> Result<Self, Self::Error> {
|
||||
use wasmparser::Type::*;
|
||||
match ty {
|
||||
I32 => Ok(WasmType::I32),
|
||||
I64 => Ok(WasmType::I64),
|
||||
F32 => Ok(WasmType::F32),
|
||||
F64 => Ok(WasmType::F64),
|
||||
V128 => Ok(WasmType::V128),
|
||||
FuncRef => Ok(WasmType::FuncRef),
|
||||
ExternRef => Ok(WasmType::ExternRef),
|
||||
EmptyBlockType | Func => Err(WasmError::InvalidWebAssembly {
|
||||
message: "unexpected value type".to_string(),
|
||||
offset: 0,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// WebAssembly function type -- equivalent of `wasmparser`'s FuncType.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct WasmFuncType {
|
||||
/// Function params types.
|
||||
pub params: Box<[WasmType]>,
|
||||
/// Returns params types.
|
||||
pub returns: Box<[WasmType]>,
|
||||
}
|
||||
|
||||
impl TryFrom<wasmparser::FuncType> for WasmFuncType {
|
||||
type Error = WasmError;
|
||||
fn try_from(ty: wasmparser::FuncType) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
params: ty
|
||||
.params
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(WasmType::try_from)
|
||||
.collect::<Result<_, Self::Error>>()?,
|
||||
returns: ty
|
||||
.returns
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(WasmType::try_from)
|
||||
.collect::<Result<_, Self::Error>>()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The value of a WebAssembly global variable.
|
||||
#[derive(Clone, Copy)]
|
||||
|
@ -524,7 +591,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment {
|
|||
/// Declares a function signature to the environment.
|
||||
fn declare_signature(
|
||||
&mut self,
|
||||
wasm_func_type: &WasmFuncType,
|
||||
wasm_func_type: WasmFuncType,
|
||||
sig: ir::Signature,
|
||||
) -> WasmResult<()>;
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ use crate::environ::{FuncEnvironment, ReturnMode, WasmResult};
|
|||
use crate::state::{FuncTranslationState, ModuleTranslationState};
|
||||
use crate::translation_utils::get_vmctx_value_label;
|
||||
use crate::wasm_unsupported;
|
||||
use core::convert::TryInto;
|
||||
use cranelift_codegen::entity::EntityRef;
|
||||
use cranelift_codegen::ir::{self, Block, InstBuilder, ValueLabel};
|
||||
use cranelift_codegen::timing;
|
||||
|
@ -196,7 +197,9 @@ fn declare_locals<FE: FuncEnvironment + ?Sized>(
|
|||
let constant_handle = builder.func.dfg.constants.insert([0; 16].to_vec().into());
|
||||
builder.ins().vconst(ir::types::I8X16, constant_handle)
|
||||
}
|
||||
ExternRef | FuncRef => environ.translate_ref_null(builder.cursor(), wasm_type)?,
|
||||
ExternRef | FuncRef => {
|
||||
environ.translate_ref_null(builder.cursor(), wasm_type.try_into()?)?
|
||||
}
|
||||
ty => return Err(wasm_unsupported!("unsupported local type {:?}", ty)),
|
||||
};
|
||||
|
||||
|
|
|
@ -2,13 +2,13 @@
|
|||
//! to deal with each part of it.
|
||||
use crate::environ::{ModuleEnvironment, WasmResult};
|
||||
use crate::sections_translator::{
|
||||
parse_code_section, parse_data_section, parse_element_section, parse_export_section,
|
||||
parse_function_section, parse_global_section, parse_import_section, parse_memory_section,
|
||||
parse_name_section, parse_start_section, parse_table_section, parse_type_section,
|
||||
parse_data_section, parse_element_section, parse_export_section, parse_function_section,
|
||||
parse_global_section, parse_import_section, parse_memory_section, parse_name_section,
|
||||
parse_start_section, parse_table_section, parse_type_section,
|
||||
};
|
||||
use crate::state::ModuleTranslationState;
|
||||
use cranelift_codegen::timing;
|
||||
use wasmparser::{CustomSectionContent, ModuleReader, SectionContent};
|
||||
use wasmparser::{NameSectionReader, Parser, Payload};
|
||||
|
||||
/// Translate a sequence of bytes forming a valid Wasm binary into a list of valid Cranelift IR
|
||||
/// [`Function`](cranelift_codegen::ir::Function).
|
||||
|
@ -17,80 +17,85 @@ pub fn translate_module<'data>(
|
|||
environ: &mut dyn ModuleEnvironment<'data>,
|
||||
) -> WasmResult<ModuleTranslationState> {
|
||||
let _tt = timing::wasm_translate_module();
|
||||
let mut reader = ModuleReader::new(data)?;
|
||||
let mut module_translation_state = ModuleTranslationState::new();
|
||||
|
||||
while !reader.eof() {
|
||||
let section = reader.read()?;
|
||||
match section.content()? {
|
||||
SectionContent::Type(types) => {
|
||||
for payload in Parser::new(0).parse_all(data) {
|
||||
match payload? {
|
||||
Payload::Version { .. } | Payload::End => {}
|
||||
|
||||
Payload::TypeSection(types) => {
|
||||
parse_type_section(types, &mut module_translation_state, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Import(imports) => {
|
||||
Payload::ImportSection(imports) => {
|
||||
parse_import_section(imports, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Function(functions) => {
|
||||
Payload::FunctionSection(functions) => {
|
||||
parse_function_section(functions, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Table(tables) => {
|
||||
Payload::TableSection(tables) => {
|
||||
parse_table_section(tables, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Memory(memories) => {
|
||||
Payload::MemorySection(memories) => {
|
||||
parse_memory_section(memories, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Global(globals) => {
|
||||
Payload::GlobalSection(globals) => {
|
||||
parse_global_section(globals, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Export(exports) => {
|
||||
Payload::ExportSection(exports) => {
|
||||
parse_export_section(exports, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Start(start) => {
|
||||
parse_start_section(start, environ)?;
|
||||
Payload::StartSection { func, .. } => {
|
||||
parse_start_section(func, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Element(elements) => {
|
||||
Payload::ElementSection(elements) => {
|
||||
parse_element_section(elements, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::Code(code) => {
|
||||
parse_code_section(code, &module_translation_state, environ)?;
|
||||
Payload::CodeSectionStart { .. } => {}
|
||||
Payload::CodeSectionEntry(code) => {
|
||||
let mut code = code.get_binary_reader();
|
||||
let size = code.bytes_remaining();
|
||||
let offset = code.original_position();
|
||||
environ.define_function_body(
|
||||
&module_translation_state,
|
||||
code.read_bytes(size)?,
|
||||
offset,
|
||||
)?;
|
||||
}
|
||||
|
||||
SectionContent::Data(data) => {
|
||||
Payload::DataSection(data) => {
|
||||
parse_data_section(data, environ)?;
|
||||
}
|
||||
|
||||
SectionContent::DataCount(count) => {
|
||||
Payload::DataCountSection { count, .. } => {
|
||||
environ.reserve_passive_data(count)?;
|
||||
}
|
||||
|
||||
SectionContent::Module(_)
|
||||
| SectionContent::ModuleCode(_)
|
||||
| SectionContent::Instance(_)
|
||||
| SectionContent::Alias(_) => unimplemented!("module linking not implemented yet"),
|
||||
Payload::ModuleSection(_)
|
||||
| Payload::InstanceSection(_)
|
||||
| Payload::AliasSection(_)
|
||||
| Payload::ModuleCodeSectionStart { .. }
|
||||
| Payload::ModuleCodeSectionEntry { .. } => {
|
||||
unimplemented!("module linking not implemented yet")
|
||||
}
|
||||
|
||||
SectionContent::Custom {
|
||||
name,
|
||||
binary,
|
||||
content,
|
||||
} => match content {
|
||||
Some(CustomSectionContent::Name(names)) => {
|
||||
parse_name_section(names, environ)?;
|
||||
}
|
||||
_ => {
|
||||
let mut reader = binary.clone();
|
||||
let len = reader.bytes_remaining();
|
||||
let payload = reader.read_bytes(len)?;
|
||||
environ.custom_section(name, payload)?;
|
||||
}
|
||||
},
|
||||
Payload::CustomSection {
|
||||
name: "name",
|
||||
data,
|
||||
data_offset,
|
||||
} => parse_name_section(NameSectionReader::new(data, data_offset)?, environ)?,
|
||||
|
||||
Payload::CustomSection { name, data, .. } => environ.custom_section(name, data)?,
|
||||
|
||||
Payload::UnknownSection { .. } => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ use crate::translation_utils::{
|
|||
};
|
||||
use crate::{wasm_unsupported, HashMap};
|
||||
use core::convert::TryFrom;
|
||||
use core::convert::TryInto;
|
||||
use cranelift_codegen::ir::immediates::V128Imm;
|
||||
use cranelift_codegen::ir::{self, AbiParam, Signature};
|
||||
use cranelift_entity::packed_option::ReservedValue;
|
||||
|
@ -22,11 +23,11 @@ use cranelift_entity::EntityRef;
|
|||
use std::boxed::Box;
|
||||
use std::vec::Vec;
|
||||
use wasmparser::{
|
||||
self, CodeSectionReader, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems,
|
||||
ElementKind, ElementSectionReader, Export, ExportSectionReader, ExternalKind,
|
||||
FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType,
|
||||
ImportSectionReader, MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader,
|
||||
Operator, TableSectionReader, Type, TypeDef, TypeSectionReader,
|
||||
self, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems, ElementKind,
|
||||
ElementSectionReader, Export, ExportSectionReader, ExternalKind, FunctionSectionReader,
|
||||
GlobalSectionReader, GlobalType, ImportSectionEntryType, ImportSectionReader,
|
||||
MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader, Operator,
|
||||
TableSectionReader, Type, TypeDef, TypeSectionReader,
|
||||
};
|
||||
|
||||
/// Parses the Type section of the wasm module.
|
||||
|
@ -53,7 +54,7 @@ pub fn parse_type_section(
|
|||
.expect("only numeric types are supported in function signatures");
|
||||
AbiParam::new(cret_arg)
|
||||
}));
|
||||
environ.declare_signature(&wasm_func_ty, sig)?;
|
||||
environ.declare_signature(wasm_func_ty.clone().try_into()?, sig)?;
|
||||
module_translation_state
|
||||
.wasm_types
|
||||
.push((wasm_func_ty.params, wasm_func_ty.returns));
|
||||
|
@ -104,7 +105,7 @@ pub fn parse_import_section<'data>(
|
|||
ImportSectionEntryType::Global(ref ty) => {
|
||||
environ.declare_global_import(
|
||||
Global {
|
||||
wasm_ty: ty.content_type,
|
||||
wasm_ty: ty.content_type.try_into()?,
|
||||
ty: type_to_type(ty.content_type, environ).unwrap(),
|
||||
mutability: ty.mutable,
|
||||
initializer: GlobalInit::Import,
|
||||
|
@ -116,7 +117,7 @@ pub fn parse_import_section<'data>(
|
|||
ImportSectionEntryType::Table(ref tab) => {
|
||||
environ.declare_table_import(
|
||||
Table {
|
||||
wasm_ty: tab.element_type,
|
||||
wasm_ty: tab.element_type.try_into()?,
|
||||
ty: match tabletype_to_type(tab.element_type, environ)? {
|
||||
Some(t) => TableElementType::Val(t),
|
||||
None => TableElementType::Func,
|
||||
|
@ -166,7 +167,7 @@ pub fn parse_table_section(
|
|||
for entry in tables {
|
||||
let table = entry?;
|
||||
environ.declare_table(Table {
|
||||
wasm_ty: table.element_type,
|
||||
wasm_ty: table.element_type.try_into()?,
|
||||
ty: match tabletype_to_type(table.element_type, environ)? {
|
||||
Some(t) => TableElementType::Val(t),
|
||||
None => TableElementType::Func,
|
||||
|
@ -237,7 +238,7 @@ pub fn parse_global_section(
|
|||
}
|
||||
};
|
||||
let global = Global {
|
||||
wasm_ty: content_type,
|
||||
wasm_ty: content_type.try_into()?,
|
||||
ty: type_to_type(content_type, environ).unwrap(),
|
||||
mutability: mutable,
|
||||
initializer,
|
||||
|
@ -357,21 +358,6 @@ pub fn parse_element_section<'data>(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Parses the Code section of the wasm module.
|
||||
pub fn parse_code_section<'data>(
|
||||
code: CodeSectionReader<'data>,
|
||||
module_translation_state: &ModuleTranslationState,
|
||||
environ: &mut dyn ModuleEnvironment<'data>,
|
||||
) -> WasmResult<()> {
|
||||
for body in code {
|
||||
let mut reader = body?.get_binary_reader();
|
||||
let size = reader.bytes_remaining();
|
||||
let offset = reader.original_position();
|
||||
environ.define_function_body(module_translation_state, reader.read_bytes(size)?, offset)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Parses the Data section of the wasm module.
|
||||
pub fn parse_data_section<'data>(
|
||||
data: DataSectionReader<'data>,
|
||||
|
|
|
@ -30,6 +30,7 @@ fn cranelift_to_wasmparser_type(ty: Type) -> WasmResult<wasmparser::Type> {
|
|||
types::I64 => wasmparser::Type::I64,
|
||||
types::F32 => wasmparser::Type::F32,
|
||||
types::F64 => wasmparser::Type::F64,
|
||||
types::R32 | types::R64 => wasmparser::Type::ExternRef,
|
||||
_ => {
|
||||
return Err(WasmError::Unsupported(format!(
|
||||
"Cannot convert Cranelift type to Wasm signature: {:?}",
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
use crate::environ::{TargetEnvironment, WasmResult, WasmType};
|
||||
use crate::state::ModuleTranslationState;
|
||||
use crate::wasm_unsupported;
|
||||
use core::convert::TryInto;
|
||||
use core::u32;
|
||||
use cranelift_codegen::entity::entity_impl;
|
||||
use cranelift_codegen::ir;
|
||||
|
@ -39,31 +40,37 @@ entity_impl!(DefinedGlobalIndex);
|
|||
|
||||
/// Index type of a table (imported or defined) inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct TableIndex(u32);
|
||||
entity_impl!(TableIndex);
|
||||
|
||||
/// Index type of a global variable (imported or defined) inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct GlobalIndex(u32);
|
||||
entity_impl!(GlobalIndex);
|
||||
|
||||
/// Index type of a linear memory (imported or defined) inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct MemoryIndex(u32);
|
||||
entity_impl!(MemoryIndex);
|
||||
|
||||
/// Index type of a signature (imported or defined) inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct SignatureIndex(u32);
|
||||
entity_impl!(SignatureIndex);
|
||||
|
||||
/// Index type of a passive data segment inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct DataIndex(u32);
|
||||
entity_impl!(DataIndex);
|
||||
|
||||
/// Index type of a passive element segment inside the WebAssembly module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct ElemIndex(u32);
|
||||
entity_impl!(ElemIndex);
|
||||
|
||||
|
@ -75,6 +82,7 @@ entity_impl!(ElemIndex);
|
|||
/// Wasm `i64` and a `funcref` might be represented with a Cranelift `i64` on
|
||||
/// 64-bit architectures, and when GC is not required for func refs.
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Global {
|
||||
/// The Wasm type of the value stored in the global.
|
||||
pub wasm_ty: crate::WasmType,
|
||||
|
@ -88,6 +96,7 @@ pub struct Global {
|
|||
|
||||
/// Globals are initialized via the `const` operators or by referring to another import.
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum GlobalInit {
|
||||
/// An `i32.const`.
|
||||
I32Const(i32),
|
||||
|
@ -111,6 +120,7 @@ pub enum GlobalInit {
|
|||
|
||||
/// WebAssembly table.
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Table {
|
||||
/// The table elements' Wasm type.
|
||||
pub wasm_ty: WasmType,
|
||||
|
@ -124,6 +134,7 @@ pub struct Table {
|
|||
|
||||
/// WebAssembly table element. Can be a function or a scalar type.
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum TableElementType {
|
||||
/// A scalar type.
|
||||
Val(ir::Type),
|
||||
|
@ -133,6 +144,7 @@ pub enum TableElementType {
|
|||
|
||||
/// WebAssembly linear memory.
|
||||
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Memory {
|
||||
/// The minimum number of pages in the memory.
|
||||
pub minimum: u32,
|
||||
|
@ -153,7 +165,9 @@ pub fn type_to_type<PE: TargetEnvironment + ?Sized>(
|
|||
wasmparser::Type::F32 => Ok(ir::types::F32),
|
||||
wasmparser::Type::F64 => Ok(ir::types::F64),
|
||||
wasmparser::Type::V128 => Ok(ir::types::I8X16),
|
||||
wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => Ok(environ.reference_type(ty)),
|
||||
wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => {
|
||||
Ok(environ.reference_type(ty.try_into()?))
|
||||
}
|
||||
ty => Err(wasm_unsupported!("type_to_type: wasm type {:?}", ty)),
|
||||
}
|
||||
}
|
||||
|
@ -170,7 +184,7 @@ pub fn tabletype_to_type<PE: TargetEnvironment + ?Sized>(
|
|||
wasmparser::Type::F32 => Ok(Some(ir::types::F32)),
|
||||
wasmparser::Type::F64 => Ok(Some(ir::types::F64)),
|
||||
wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)),
|
||||
wasmparser::Type::ExternRef => Ok(Some(environ.reference_type(ty))),
|
||||
wasmparser::Type::ExternRef => Ok(Some(environ.reference_type(ty.try_into()?))),
|
||||
wasmparser::Type::FuncRef => Ok(None),
|
||||
ty => Err(wasm_unsupported!(
|
||||
"tabletype_to_type: table wasm type {:?}",
|
||||
|
@ -226,7 +240,7 @@ pub fn block_with_params<PE: TargetEnvironment + ?Sized>(
|
|||
builder.append_block_param(block, ir::types::F64);
|
||||
}
|
||||
wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => {
|
||||
builder.append_block_param(block, environ.reference_type(*ty));
|
||||
builder.append_block_param(block, environ.reference_type((*ty).try_into()?));
|
||||
}
|
||||
wasmparser::Type::V128 => {
|
||||
builder.append_block_param(block, ir::types::I8X16);
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"9fca7a43287a7b615baacc0a1a6ffcb5778793feeeeb37a3e2329d8494bd0744","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","src/analysis_control_flow.rs":"82e71cd345f1d0eb93db23e28048f4b1b3f0dfaf729220df561477be2667a26f","src/analysis_data_flow.rs":"1edb01adc754330a56b7d2c9b2fd28243f9d42a77b828dfbf069a16b6ae28b0d","src/analysis_main.rs":"fda064ae61d15b4f336ab9f4b7fb55fc51ed284f311024ec7b7dec002933a83b","src/avl_tree.rs":"5124db9c746d9c60de4e2cea91f99d68838e7f80585b69cdfea9c53b8d7c6275","src/bt_coalescing_analysis.rs":"7f1c4ac1e844bdf255b63764373a1e71b942fbe21aea65f341d3c0175a2b6805","src/bt_commitment_map.rs":"4209822d3bfa15295ec345616c5da8256603fe7460f4cffa8176f236cc1ddebe","src/bt_main.rs":"f9b745f07dfa4cc892e6ebd06bd6f5e4ffce1aac3ab0585283185bb2bf2b4e58","src/bt_spillslot_allocator.rs":"3939ebe38ddb324f5bcac0da708e0f7d6d91a5735f3342a86f93c6ab551b2731","src/bt_vlr_priority_queue.rs":"082d9ede8c1e7ec3129356450b42056e090f1dae77688589f71be7055eff5333","src/checker.rs":"b767b5e2e014eeddc248cbd76e55f3d45db92a60a280d09e0923bb129f720c1c","src/data_structures.rs":"eca398adfe3e8e7c3c1482c856bbbdaef3fc1f042008a0031171f8324b2e2320","src/inst_stream.rs":"1ade5e2685a29c00b9d8a82c0e295666c68b6b16d28c23b3f43239cc2997f0bf","src/lib.rs":"683a8c53098bc246049a2d55454c4ec86639e692a3950b298e116a80e2493603","src/linear_scan/analysis.rs":"babd475dc0b703e49babc27913676b4ad3f277397f3d2f751cd4fb3743c00ec8","src/linear_scan/assign_registers.rs":"c4e75bc01f447ee8abeb7420172550d114c83813f8b4e81fce1d10c423955088","src/linear_scan/mod.rs":"b238d4c7ef2e71f79b49cdb9c97df784e40e7e659347e3895d4e32244891c54d","src/linear_scan/resolve_moves.rs":"502ee5f515da69c6368a0db50165e9351a2064f226c202f0ac3e1008349f1592","src/reg_maps.rs":"87ede67e8e550fd8e3aa4d880989b2bdf6f21b5668c5fb657039627aa8c1c3d3","src/snapshot.rs":"c434ad0477ef66c2bc6a8ae1ed49df544c5c9185aeb6149fbc682b94aef8aedd","src/sparse_set.rs":"4f0dd9552c1eb190441ed4f1a84aa6861ef44ab825b2df551bd721d69d36c53e","src/union_find.rs":"78f5863cd61ad8c6a21b1b1ccc0996d9c19b469ff8ebd101a1e9f0a3e10f1b7c"},"package":"7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95"}
|
||||
{"files":{"Cargo.toml":"c0be0c0ada75a2b5625764ddea1e8abf7feb3048c3eddba825b86d21401c8af2","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","src/analysis_control_flow.rs":"82e71cd345f1d0eb93db23e28048f4b1b3f0dfaf729220df561477be2667a26f","src/analysis_data_flow.rs":"e6caeabd27281dffd5c3e5ed7825c5fa36e881dd3b63e7a0a37b429e207fc1e5","src/analysis_main.rs":"49acbb4ca0ee234d40c70f190341d39894aff1c60449e4258cdcf8c31023168f","src/analysis_reftypes.rs":"efe925c5ebd76c97c97675d998d02151e9e7fcf6473eaa2e41f8dabd369c2c5e","src/avl_tree.rs":"2e48fe5700273f9c3838a69856b341cc993169987ace0580d4de295953449b9b","src/bt_coalescing_analysis.rs":"aed531df612f6f2297ef11ada850091006c72a7a15ea44be20e6e8132cff61a0","src/bt_commitment_map.rs":"2dabf2896b7b2c96d9cf9bf26cdd397765b468772bf44fbb0337eff85433d0f7","src/bt_main.rs":"ee6805a830fbf9c9c4ccce0d9236c8fdf4aab2a1cbcab623187442858b7e7194","src/bt_spillslot_allocator.rs":"3534171c6e156c3d5a948036430a80a2ca7ba728a3e4b33e598479740cffe0e3","src/bt_vlr_priority_queue.rs":"082d9ede8c1e7ec3129356450b42056e090f1dae77688589f71be7055eff5333","src/checker.rs":"65d3e2e5d3e4e4dc05844bbf4e9515bdc21d8d324f2923e4ec6ae2ba7602dee3","src/data_structures.rs":"ffa1d22e85a325d8683dcd11b5b6c6dbd4cd3388df542488a684b154b639d234","src/inst_stream.rs":"9b453924b228bd5d137877769ad6ac892eb22e642d52822f2aeeeb401ac9e386","src/lib.rs":"aa07a5e33bb2b5d6599bca5a3f10a964bb3b62d0a8d52db46b6b4f3ae75f148a","src/linear_scan/analysis.rs":"babd475dc0b703e49babc27913676b4ad3f277397f3d2f751cd4fb3743c00ec8","src/linear_scan/assign_registers.rs":"c4e75bc01f447ee8abeb7420172550d114c83813f8b4e81fce1d10c423955088","src/linear_scan/mod.rs":"dcccdff0b534865776aa807abb268b2b3e008c2b2ac5dacb44eaf1f47e00c472","src/linear_scan/resolve_moves.rs":"2c51e4d6a096454db79090b078780ee9938eae4dd1fe0d32103bdc4e56e4e3c8","src/reg_maps.rs":"87ede67e8e550fd8e3aa4d880989b2bdf6f21b5668c5fb657039627aa8c1c3d3","src/snapshot.rs":"62ff934004a93697d48049e0dae1b99717c56ca35154d3a12d6ba22e47fe0d16","src/sparse_set.rs":"4f0dd9552c1eb190441ed4f1a84aa6861ef44ab825b2df551bd721d69d36c53e","src/union_find.rs":"78f5863cd61ad8c6a21b1b1ccc0996d9c19b469ff8ebd101a1e9f0a3e10f1b7c"},"package":"3598bed0895fe0f72a9e0b00ef9e3a3c8af978a8401b2f2046dec5927de6364a"}
|
|
@ -13,7 +13,7 @@
|
|||
[package]
|
||||
edition = "2018"
|
||||
name = "regalloc"
|
||||
version = "0.0.26"
|
||||
version = "0.0.28"
|
||||
authors = ["The Regalloc.rs Developers"]
|
||||
description = "Modular register allocation algorithms"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
|
|
|
@ -7,10 +7,11 @@ use std::fmt;
|
|||
|
||||
use crate::analysis_control_flow::CFGInfo;
|
||||
use crate::data_structures::{
|
||||
BlockIx, InstIx, InstPoint, Point, Queue, RangeFrag, RangeFragIx, RangeFragKind,
|
||||
RangeFragMetrics, RealRange, RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegSets,
|
||||
RegUsageCollector, RegVecBounds, RegVecs, RegVecsAndBounds, SortedRangeFragIxs,
|
||||
SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
|
||||
BlockIx, InstIx, InstPoint, MoveInfo, MoveInfoElem, Point, Queue, RangeFrag, RangeFragIx,
|
||||
RangeFragKind, RangeFragMetrics, RangeId, RealRange, RealRangeIx, RealReg, RealRegUniverse,
|
||||
Reg, RegClass, RegSets, RegToRangesMaps, RegUsageCollector, RegVecBounds, RegVecs,
|
||||
RegVecsAndBounds, SortedRangeFragIxs, SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange,
|
||||
VirtualRangeIx, VirtualReg,
|
||||
};
|
||||
use crate::sparse_set::SparseSet;
|
||||
use crate::union_find::{ToFromU32, UnionFind};
|
||||
|
@ -1154,9 +1155,7 @@ pub fn get_range_frags<F: Function>(
|
|||
assert!(rvb.is_sanitized());
|
||||
|
||||
// In order that we can work with unified-reg-indices (see comments above), we need to know
|
||||
// (1) how many virtual regs there are and (2) the `RegClass` for each. That info is
|
||||
// collected in a single pass here. In principle regalloc.rs's user could tell us (1), but
|
||||
// as yet the interface does not make that possible.
|
||||
// the `RegClass` for each virtual register. That info is collected here.
|
||||
let mut vreg_classes = vec![RegClass::INVALID; func.get_num_vregs()];
|
||||
for r in rvb
|
||||
.vecs
|
||||
|
@ -1458,6 +1457,7 @@ fn create_and_add_range(
|
|||
vreg: reg.to_virtual_reg(),
|
||||
rreg: None,
|
||||
sorted_frags,
|
||||
is_ref: false, // analysis_reftypes.rs may later change this
|
||||
size,
|
||||
total_cost,
|
||||
spill_cost,
|
||||
|
@ -1466,6 +1466,7 @@ fn create_and_add_range(
|
|||
result_real.push(RealRange {
|
||||
rreg: reg.to_real_reg(),
|
||||
sorted_frags: sorted_frag_ixs,
|
||||
is_ref: false, // analysis_reftypes.rs may later change this
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1806,3 +1807,154 @@ pub fn merge_range_frags(
|
|||
|
||||
(result_real, result_virtual)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Auxiliary activities that mostly fall under the category "dataflow analysis", but are not
|
||||
// part of the main dataflow analysis pipeline.
|
||||
|
||||
// Dataflow and liveness together create vectors of VirtualRanges and RealRanges. These define
|
||||
// (amongst other things) mappings from VirtualRanges to VirtualRegs and from RealRanges to
|
||||
// RealRegs. However, we often need the inverse mappings: from VirtualRegs to (sets of
|
||||
// VirtualRanges) and from RealRegs to (sets of) RealRanges. This function computes those
|
||||
// inverse mappings. They are used by BT's coalescing analysis, and for the dataflow analysis
|
||||
// that supports reftype handling.
|
||||
#[inline(never)]
|
||||
pub fn compute_reg_to_ranges_maps<F: Function>(
|
||||
func: &F,
|
||||
univ: &RealRegUniverse,
|
||||
rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
|
||||
vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
) -> RegToRangesMaps {
|
||||
// We have in hand the virtual live ranges. Each of these carries its
|
||||
// associated vreg. So in effect we have a VLR -> VReg mapping. We now
|
||||
// invert that, so as to generate a mapping from VRegs to their containing
|
||||
// VLRs.
|
||||
//
|
||||
// Note that multiple VLRs may map to the same VReg. So the inverse mapping
|
||||
// will actually be from VRegs to a set of VLRs. In most cases, we expect
|
||||
// the virtual-registerised-code given to this allocator to be derived from
|
||||
// SSA, in which case each VReg will have only one VLR. So in this case,
|
||||
// the cost of first creating the mapping, and then looking up all the VRegs
|
||||
// in moves in it, will have cost linear in the size of the input function.
|
||||
//
|
||||
// NB re the SmallVec. That has set semantics (no dups).
|
||||
let mut vreg_to_vlrs_map = vec![SmallVec::<[VirtualRangeIx; 3]>::new(); func.get_num_vregs()];
|
||||
for (vlr, n) in vlr_env.iter().zip(0..) {
|
||||
let vlrix = VirtualRangeIx::new(n);
|
||||
let vreg: VirtualReg = vlr.vreg;
|
||||
// Now we know that there's a VLR `vlr` that is for VReg `vreg`. Update the inverse
|
||||
// mapping accordingly. We know we are stepping sequentially through the VLR (index)
|
||||
// space, so we'll never see the same VLRIx twice. Hence there's no need to check for
|
||||
// dups when adding a VLR index to an existing binding for a VReg.
|
||||
//
|
||||
// If this array-indexing fails, it means the client's `.get_num_vregs()` function
|
||||
// claims there are fewer virtual regs than we actually observe in the code it gave us.
|
||||
// So it's a bug in the client.
|
||||
vreg_to_vlrs_map[vreg.get_index()].push(vlrix);
|
||||
}
|
||||
|
||||
// Same for the real live ranges.
|
||||
let mut rreg_to_rlrs_map = vec![Vec::<RealRangeIx>::new(); univ.allocable];
|
||||
for (rlr, n) in rlr_env.iter().zip(0..) {
|
||||
let rlrix = RealRangeIx::new(n);
|
||||
let rreg: RealReg = rlr.rreg;
|
||||
// If this array-indexing fails, it means something has gone wrong with sanitisation of
|
||||
// real registers -- that should ensure that we never see a real register with an index
|
||||
// greater than `univ.allocable`. So it's a bug in the allocator's analysis phases.
|
||||
rreg_to_rlrs_map[rreg.get_index()].push(rlrix);
|
||||
}
|
||||
|
||||
RegToRangesMaps {
|
||||
rreg_to_rlrs_map,
|
||||
vreg_to_vlrs_map,
|
||||
}
|
||||
}
|
||||
|
||||
// Collect info about registers (and optionally Virtual/RealRanges) that are
|
||||
// connected by moves:
|
||||
#[inline(never)]
|
||||
pub fn collect_move_info<F: Function>(
|
||||
func: &F,
|
||||
reg_vecs_and_bounds: &RegVecsAndBounds,
|
||||
est_freqs: &TypedIxVec<BlockIx, u32>,
|
||||
reg_to_ranges_maps: &RegToRangesMaps,
|
||||
rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
|
||||
vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
|
||||
want_ranges: bool,
|
||||
) -> MoveInfo {
|
||||
// Helper: find the RealRange or VirtualRange for a register at an InstPoint.
|
||||
let find_range_for_reg = |pt: InstPoint, reg: Reg| {
|
||||
if !want_ranges {
|
||||
return RangeId::invalid_value();
|
||||
}
|
||||
if reg.is_real() {
|
||||
for &rlrix in ®_to_ranges_maps.rreg_to_rlrs_map[reg.get_index() as usize] {
|
||||
if rlr_env[rlrix].sorted_frags.contains_pt(fenv, pt) {
|
||||
return RangeId::new_real(rlrix);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for &vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[reg.get_index() as usize] {
|
||||
if vlr_env[vlrix].sorted_frags.contains_pt(pt) {
|
||||
return RangeId::new_virtual(vlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
RangeId::invalid_value()
|
||||
};
|
||||
|
||||
let mut moves = Vec::<MoveInfoElem>::new();
|
||||
for b in func.blocks() {
|
||||
let block_eef = est_freqs[b];
|
||||
for iix in func.block_insns(b) {
|
||||
let insn = &func.get_insn(iix);
|
||||
let im = func.is_move(insn);
|
||||
match im {
|
||||
None => {}
|
||||
Some((wreg, reg)) => {
|
||||
let iix_bounds = ®_vecs_and_bounds.bounds[iix];
|
||||
// It might seem strange to assert that `defs_len` and/or
|
||||
// `uses_len` is <= 1 rather than == 1. The reason is
|
||||
// that either or even both registers might be ones which
|
||||
// are not available to the allocator. Hence they will
|
||||
// have been removed by the sanitisation machinery before
|
||||
// we get to this point. If either is missing, we
|
||||
// unfortunately can't coalesce the move away, and just
|
||||
// have to live with it.
|
||||
//
|
||||
// If any of the following five assertions fail, the
|
||||
// client's `is_move` is probably lying to us.
|
||||
assert!(iix_bounds.uses_len <= 1);
|
||||
assert!(iix_bounds.defs_len <= 1);
|
||||
assert!(iix_bounds.mods_len == 0);
|
||||
if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
|
||||
let reg_vecs = ®_vecs_and_bounds.vecs;
|
||||
assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
|
||||
assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
|
||||
let dst = wreg.to_reg();
|
||||
let src = reg;
|
||||
let est_freq = block_eef;
|
||||
|
||||
// Find the ranges for source and dest, if requested.
|
||||
let (src_range, dst_range) = (
|
||||
find_range_for_reg(InstPoint::new(iix, Point::Use), src),
|
||||
find_range_for_reg(InstPoint::new(iix, Point::Def), dst),
|
||||
);
|
||||
|
||||
moves.push(MoveInfoElem {
|
||||
dst,
|
||||
dst_range,
|
||||
src,
|
||||
src_range,
|
||||
iix,
|
||||
est_freq,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MoveInfo { moves }
|
||||
}
|
||||
|
|
|
@ -4,14 +4,17 @@ use log::{debug, info};
|
|||
|
||||
use crate::analysis_control_flow::{CFGInfo, InstIxToBlockIxMap};
|
||||
use crate::analysis_data_flow::{
|
||||
calc_def_and_use, calc_livein_and_liveout, get_range_frags, get_sanitized_reg_uses_for_func,
|
||||
merge_range_frags,
|
||||
calc_def_and_use, calc_livein_and_liveout, collect_move_info, compute_reg_to_ranges_maps,
|
||||
get_range_frags, get_sanitized_reg_uses_for_func, merge_range_frags,
|
||||
};
|
||||
use crate::analysis_reftypes::do_reftypes_analysis;
|
||||
use crate::data_structures::{
|
||||
BlockIx, RangeFrag, RangeFragIx, RangeFragMetrics, RealRange, RealRangeIx, RealReg,
|
||||
RealRegUniverse, RegVecsAndBounds, TypedIxVec, VirtualRange, VirtualRangeIx,
|
||||
BlockIx, MoveInfo, RangeFrag, RangeFragIx, RangeFragMetrics, RealRange, RealRangeIx, RealReg,
|
||||
RealRegUniverse, RegClass, RegToRangesMaps, RegVecsAndBounds, TypedIxVec, VirtualRange,
|
||||
VirtualRangeIx, VirtualReg,
|
||||
};
|
||||
use crate::sparse_set::SparseSet;
|
||||
use crate::AlgorithmWithDefaults;
|
||||
use crate::Function;
|
||||
|
||||
//=============================================================================
|
||||
|
@ -45,6 +48,10 @@ pub enum AnalysisError {
|
|||
/// Implementation limits exceeded. The incoming function is too big. It
|
||||
/// may contain at most 1 million basic blocks and 16 million instructions.
|
||||
ImplementationLimitsExceeded,
|
||||
|
||||
/// Currently LSRA can't generate stackmaps, but the client has requested LSRA *and*
|
||||
/// stackmaps.
|
||||
LSRACantDoStackmaps,
|
||||
}
|
||||
|
||||
impl ToString for AnalysisError {
|
||||
|
@ -65,6 +72,9 @@ impl ToString for AnalysisError {
|
|||
AnalysisError::ImplementationLimitsExceeded => {
|
||||
"implementation limits exceeded (more than 1 million blocks or 16 million insns)".to_string()
|
||||
}
|
||||
AnalysisError::LSRACantDoStackmaps => {
|
||||
"LSRA *and* stackmap creation requested; but this combination is not yet supported".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,12 +97,23 @@ pub struct AnalysisInfo {
|
|||
pub(crate) estimated_frequencies: TypedIxVec<BlockIx, u32>,
|
||||
/// Maps InstIxs to BlockIxs
|
||||
pub(crate) inst_to_block_map: InstIxToBlockIxMap,
|
||||
/// Maps from RealRegs to sets of RealRanges and VirtualRegs to sets of VirtualRanges
|
||||
/// (all operating on indices, not the actual objects). This is only generated in
|
||||
/// situations where we need it, hence the `Option`.
|
||||
pub(crate) reg_to_ranges_maps: Option<RegToRangesMaps>,
|
||||
/// Information about registers connected by moves. This is only generated in situations
|
||||
/// where we need it, hence the `Option`.
|
||||
pub(crate) move_info: Option<MoveInfo>,
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub fn run_analysis<F: Function>(
|
||||
func: &F,
|
||||
reg_universe: &RealRegUniverse,
|
||||
algorithm: AlgorithmWithDefaults,
|
||||
client_wants_stackmaps: bool,
|
||||
reftype_class: RegClass,
|
||||
reftyped_vregs: &Vec<VirtualReg>, // as supplied by the client
|
||||
) -> Result<AnalysisInfo, AnalysisError> {
|
||||
info!("run_analysis: begin");
|
||||
info!(
|
||||
|
@ -100,6 +121,12 @@ pub fn run_analysis<F: Function>(
|
|||
func.blocks().len(),
|
||||
func.insns().len()
|
||||
);
|
||||
|
||||
// LSRA can't do reftypes yet. That should have been checked at the top level already.
|
||||
if client_wants_stackmaps {
|
||||
assert!(algorithm != AlgorithmWithDefaults::LinearScan);
|
||||
}
|
||||
|
||||
info!(" run_analysis: begin control flow analysis");
|
||||
|
||||
// First do control flow analysis. This is (relatively) simple. Note that
|
||||
|
@ -196,7 +223,9 @@ pub fn run_analysis<F: Function>(
|
|||
&liveout_sets_per_block,
|
||||
);
|
||||
|
||||
let (rlr_env, vlr_env) = merge_range_frags(
|
||||
// These have to be mut because they may get changed below by the call to
|
||||
// `to_reftypes_analysis`.
|
||||
let (mut rlr_env, mut vlr_env) = merge_range_frags(
|
||||
&frag_ixs_per_reg,
|
||||
&frag_env,
|
||||
&frag_metrics_env,
|
||||
|
@ -226,7 +255,53 @@ pub fn run_analysis<F: Function>(
|
|||
n += 1;
|
||||
}
|
||||
|
||||
// Now a bit of auxiliary info collection, which isn't really either control- or data-flow
|
||||
// analysis.
|
||||
|
||||
// For BT and/or reftypes, we'll also need the reg-to-ranges maps.
|
||||
let reg_to_ranges_maps =
|
||||
if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
|
||||
Some(compute_reg_to_ranges_maps(
|
||||
func,
|
||||
®_universe,
|
||||
&rlr_env,
|
||||
&vlr_env,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// For BT and/or reftypes, we'll also need information about moves.
|
||||
let move_info = if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
|
||||
Some(collect_move_info(
|
||||
func,
|
||||
®_vecs_and_bounds,
|
||||
&estimated_frequencies,
|
||||
reg_to_ranges_maps.as_ref().unwrap(),
|
||||
&rlr_env,
|
||||
&vlr_env,
|
||||
&frag_env,
|
||||
/* want_ranges = */ client_wants_stackmaps,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
info!(" run_analysis: end liveness analysis");
|
||||
|
||||
if client_wants_stackmaps {
|
||||
info!(" run_analysis: begin reftypes analysis");
|
||||
do_reftypes_analysis(
|
||||
&mut rlr_env,
|
||||
&mut vlr_env,
|
||||
reg_to_ranges_maps.as_ref().unwrap(), /* safe because of logic just above */
|
||||
&move_info.as_ref().unwrap(), /* ditto */
|
||||
reftype_class,
|
||||
reftyped_vregs,
|
||||
);
|
||||
info!(" run_analysis: end reftypes analysis");
|
||||
}
|
||||
|
||||
info!("run_analysis: end");
|
||||
|
||||
Ok(AnalysisInfo {
|
||||
|
@ -237,5 +312,7 @@ pub fn run_analysis<F: Function>(
|
|||
range_metrics: frag_metrics_env,
|
||||
estimated_frequencies,
|
||||
inst_to_block_map,
|
||||
reg_to_ranges_maps,
|
||||
move_info,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
//! Performs a simple taint analysis, to find all live ranges that are reftyped.
|
||||
|
||||
use crate::data_structures::{
|
||||
MoveInfo, MoveInfoElem, RangeId, RealRange, RealRangeIx, RegClass, RegToRangesMaps, TypedIxVec,
|
||||
VirtualRange, VirtualRangeIx, VirtualReg,
|
||||
};
|
||||
use crate::sparse_set::SparseSet;
|
||||
|
||||
use log::debug;
|
||||
|
||||
pub fn do_reftypes_analysis(
|
||||
// From dataflow/liveness analysis. Modified by setting their is_ref bit.
|
||||
rlr_env: &mut TypedIxVec<RealRangeIx, RealRange>,
|
||||
vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
// From dataflow analysis
|
||||
reg_to_ranges_maps: &RegToRangesMaps,
|
||||
move_info: &MoveInfo,
|
||||
// As supplied by the client
|
||||
reftype_class: RegClass,
|
||||
reftyped_vregs: &Vec<VirtualReg>,
|
||||
) {
|
||||
// The game here is: starting with `reftyped_vregs`, find *all* the VirtualRanges and
|
||||
// RealRanges to which that refness can flow, via instructions which the client's `is_move`
|
||||
// function considers to be moves.
|
||||
|
||||
// We have `move_info`, which tells us which regs (both real and virtual) are connected by
|
||||
// moves. However, that's not directly useful -- we need to know which *ranges* are
|
||||
// connected by moves. So first, convert `move_info` into a set of range-pairs.
|
||||
|
||||
let mut range_pairs = Vec::<(RangeId, RangeId)>::new(); // (DST, SRC)
|
||||
|
||||
debug!("do_reftypes_analysis starting");
|
||||
|
||||
for &MoveInfoElem {
|
||||
dst,
|
||||
src,
|
||||
src_range,
|
||||
dst_range,
|
||||
iix,
|
||||
..
|
||||
} in &move_info.moves
|
||||
{
|
||||
// Don't waste time processing moves which can't possibly be of reftyped values.
|
||||
if dst.get_class() != reftype_class {
|
||||
continue;
|
||||
}
|
||||
debug!(
|
||||
"move from {:?} (range {:?}) to {:?} (range {:?}) at inst {:?}",
|
||||
src, src_range, dst, dst_range, iix
|
||||
);
|
||||
range_pairs.push((dst_range, src_range));
|
||||
}
|
||||
|
||||
// We have to hand the range-pairs that must be a superset of the moves that could possibly
|
||||
// carry reftyped values. Now compute the starting set of reftyped virtual ranges. This
|
||||
// can serve as the starting value for the following fixpoint iteration.
|
||||
|
||||
let mut reftyped_ranges = SparseSet::<RangeId>::empty();
|
||||
for vreg in reftyped_vregs {
|
||||
// If this fails, the client has been telling is that some virtual reg is reftyped, yet
|
||||
// it doesn't belong to the class of regs that it claims can carry refs. So the client
|
||||
// is buggy.
|
||||
debug_assert!(vreg.get_class() == reftype_class);
|
||||
for vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[vreg.get_index()] {
|
||||
debug!("range {:?} is reffy due to reffy vreg {:?}", vlrix, vreg);
|
||||
reftyped_ranges.insert(RangeId::new_virtual(*vlrix));
|
||||
}
|
||||
}
|
||||
|
||||
// Now, finally, compute the fixpoint resulting from repeatedly mapping `reftyped_ranges`
|
||||
// through `range_pairs`. XXXX this looks dangerously expensive .. reimplement.
|
||||
//
|
||||
// Later .. this is overkill. All that is needed is a DFS of the directed graph in which
|
||||
// the nodes are the union of the RealRange(Ixs) and the VirtualRange(Ixs), and whose edges
|
||||
// are exactly what we computed into `range_pairs`. This graph then needs to be searched
|
||||
// from each root in `reftyped_ranges`.
|
||||
loop {
|
||||
let card_before = reftyped_ranges.card();
|
||||
|
||||
for (dst_lr_id, src_lr_id) in &range_pairs {
|
||||
if reftyped_ranges.contains(*src_lr_id) {
|
||||
debug!("reftyped range {:?} -> {:?}", src_lr_id, dst_lr_id);
|
||||
reftyped_ranges.insert(*dst_lr_id);
|
||||
}
|
||||
}
|
||||
|
||||
let card_after = reftyped_ranges.card();
|
||||
if card_after == card_before {
|
||||
// Since we're only ever adding items to `reftyped_ranges`, and it has set
|
||||
// semantics, checking that the cardinality is unchanged is an adequate check for
|
||||
// having reached a (the minimal?) fixpoint.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, annotate rlr_env/vlr_env with the results of the analysis. (That was the whole
|
||||
// point!)
|
||||
for lr_id in reftyped_ranges.iter() {
|
||||
if lr_id.is_real() {
|
||||
let rrange = &mut rlr_env[lr_id.to_real()];
|
||||
debug_assert!(!rrange.is_ref);
|
||||
debug!(" -> rrange {:?} is reffy", lr_id.to_real());
|
||||
rrange.is_ref = true;
|
||||
} else {
|
||||
let vrange = &mut vlr_env[lr_id.to_virtual()];
|
||||
debug_assert!(!vrange.is_ref);
|
||||
debug!(" -> rrange {:?} is reffy", lr_id.to_virtual());
|
||||
vrange.is_ref = true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -818,6 +818,45 @@ impl<T: Clone + PartialOrd> AVLTree<T> {
|
|||
}
|
||||
}
|
||||
|
||||
// Find `item` in the tree, and replace it with `replacement`. `item` and `replacement`
|
||||
// must compare equal per the comparison function `cmp`. Returns a bool indicating whether
|
||||
// `item` was found (and hence, replaced). There's no comparison fast-path here
|
||||
// (meaning, `cmp` is `&F` and not `Option<&F>`) only because so far there is no use case
|
||||
// for it.
|
||||
pub fn find_and_replace<F>(&mut self, item: T, replacement: T, cmp: &F) -> bool
|
||||
where
|
||||
F: Fn(T, T) -> Option<Ordering>,
|
||||
{
|
||||
let mut n = self.root;
|
||||
loop {
|
||||
if n == AVL_NULL {
|
||||
return false;
|
||||
}
|
||||
let cmp_arg_left: T = item.clone();
|
||||
let cmp_arg_right: T = self.pool[n as usize].item.clone();
|
||||
match cmp(cmp_arg_left, cmp_arg_right) {
|
||||
Some(Ordering::Less) => {
|
||||
n = self.pool[n as usize].left;
|
||||
}
|
||||
Some(Ordering::Greater) => {
|
||||
n = self.pool[n as usize].right;
|
||||
}
|
||||
Some(Ordering::Equal) => {
|
||||
// Do what we can to ensure the caller can't mess up the total ordering in
|
||||
// the tree. This is more restrictive than it needs to be, but loosening
|
||||
// it requires finding the largest item below `item` and the smallest one
|
||||
// above it, which is expensive.
|
||||
assert!(cmp(item, replacement.clone()) == Some(Ordering::Equal));
|
||||
self.pool[n as usize].item = replacement.clone();
|
||||
return true;
|
||||
}
|
||||
None => {
|
||||
panic!("AVLTree::find_and_replace: unordered elements in search!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine whether an item is in the tree.
|
||||
// sewardj 2020Mar31: this is not used; I assume all users of the trees
|
||||
// do their own custom traversals. Remove #[cfg(test)] if any real uses
|
||||
|
|
|
@ -30,8 +30,8 @@ use log::{debug, info, log_enabled, Level};
|
|||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
use crate::data_structures::{
|
||||
BlockIx, InstIx, InstPoint, RangeFrag, RangeFragIx, RealRange, RealRangeIx, RealReg,
|
||||
RealRegUniverse, Reg, RegVecsAndBounds, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
|
||||
InstIx, InstPoint, MoveInfo, MoveInfoElem, RangeFrag, RangeFragIx, RealRange, RealRangeIx,
|
||||
RealReg, RealRegUniverse, RegToRangesMaps, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
|
||||
VirtualReg,
|
||||
};
|
||||
use crate::union_find::{ToFromU32, UnionFind, UnionFindEquivClasses};
|
||||
|
@ -132,197 +132,113 @@ impl ToFromU32 for VirtualRangeIx {
|
|||
#[inline(never)]
|
||||
pub fn do_coalescing_analysis<F: Function>(
|
||||
func: &F,
|
||||
reg_vecs_and_bounds: &RegVecsAndBounds,
|
||||
univ: &RealRegUniverse,
|
||||
rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
|
||||
vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
|
||||
est_freqs: &TypedIxVec<BlockIx, u32>,
|
||||
univ: &RealRegUniverse,
|
||||
reg_to_ranges_maps: &RegToRangesMaps,
|
||||
move_info: &MoveInfo,
|
||||
) -> (
|
||||
TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>>,
|
||||
UnionFindEquivClasses<VirtualRangeIx>,
|
||||
TypedIxVec<InstIx, bool>,
|
||||
Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>>,
|
||||
) {
|
||||
info!("");
|
||||
info!("do_coalescing_analysis: begin");
|
||||
// We have in hand the virtual live ranges. Each of these carries its
|
||||
// associated vreg. So in effect we have a VLR -> VReg mapping. We now
|
||||
// invert that, so as to generate a mapping from VRegs to their containing
|
||||
// VLRs.
|
||||
//
|
||||
// Note that multiple VLRs may map to the same VReg. So the inverse mapping
|
||||
// will actually be from VRegs to a set of VLRs. In most cases, we expect
|
||||
// the virtual-registerised-code given to this allocator to be derived from
|
||||
// SSA, in which case each VReg will have only one VLR. So in this case,
|
||||
// the cost of first creating the mapping, and then looking up all the VRegs
|
||||
// in moves in it, will have cost linear in the size of the input function.
|
||||
//
|
||||
// It would be convenient here to know how many VRegs there are ahead of
|
||||
// time, but until then we'll discover it dynamically.
|
||||
// NB re the SmallVec. That has set semantics (no dups)
|
||||
// FIXME use SmallVec for the VirtualRangeIxs. Or even a sparse set.
|
||||
let mut vreg_to_vlrs_map = Vec::</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>>::new();
|
||||
|
||||
for (vlr, n) in vlr_env.iter().zip(0..) {
|
||||
let vlrix = VirtualRangeIx::new(n);
|
||||
let vreg: VirtualReg = vlr.vreg;
|
||||
// Now we know that there's a VLR `vlr` that is for VReg `vreg`. Update
|
||||
// the inverse mapping accordingly. That may involve resizing it, since
|
||||
// we have no idea of the order in which we will first encounter VRegs.
|
||||
// By contrast, we know we are stepping sequentially through the VLR
|
||||
// (index) space, and we'll never see the same VLRIx twice. So there's no
|
||||
// need to check for dups when adding a VLR index to an existing binding
|
||||
// for a VReg.
|
||||
let vreg_ix = vreg.get_index();
|
||||
// There follow four closures, which are used to find out whether a real or virtual reg has
|
||||
// a last use or first def at some instruction. This is the central activity of the
|
||||
// coalescing analysis -- finding move instructions that are the last def for the src reg
|
||||
// and the first def for the dst reg.
|
||||
|
||||
while vreg_to_vlrs_map.len() <= vreg_ix {
|
||||
vreg_to_vlrs_map.push(smallvec![]); // This is very un-clever
|
||||
}
|
||||
|
||||
vreg_to_vlrs_map[vreg_ix].push(vlrix);
|
||||
}
|
||||
|
||||
// Same for the real live ranges
|
||||
let mut rreg_to_rlrs_map = Vec::</*rreg index,*/ Vec<RealRangeIx>>::new();
|
||||
|
||||
for (rlr, n) in rlr_env.iter().zip(0..) {
|
||||
let rlrix = RealRangeIx::new(n);
|
||||
let rreg: RealReg = rlr.rreg;
|
||||
let rreg_ix = rreg.get_index();
|
||||
|
||||
while rreg_to_rlrs_map.len() <= rreg_ix {
|
||||
rreg_to_rlrs_map.push(vec![]); // This is very un-clever
|
||||
}
|
||||
|
||||
rreg_to_rlrs_map[rreg_ix].push(rlrix);
|
||||
}
|
||||
|
||||
// And what do we got?
|
||||
//for (vlrixs, vreg) in vreg_to_vlrs_map.iter().zip(0..) {
|
||||
// println!("QQQQ vreg v{:?} -> vlrixs {:?}", vreg, vlrixs);
|
||||
//}
|
||||
//for (rlrixs, rreg) in rreg_to_rlrs_map.iter().zip(0..) {
|
||||
// println!("QQQQ rreg r{:?} -> rlrixs {:?}", rreg, rlrixs);
|
||||
//}
|
||||
|
||||
// Range end checks for VRegs. The XX means either "Last use" or "First
|
||||
// def", depending on the boolean parameter.
|
||||
let doesVRegHaveXXat
|
||||
// `xxIsLastUse` is true means "XX is last use"
|
||||
// `xxIsLastUse` is false means "XX is first def"
|
||||
= |xxIsLastUse: bool, vreg: VirtualReg, iix: InstIx|
|
||||
-> Option<VirtualRangeIx> {
|
||||
let vreg_no = vreg.get_index();
|
||||
let vlrixs = &vreg_to_vlrs_map[vreg_no];
|
||||
for vlrix in vlrixs {
|
||||
for frag in &vlr_env[*vlrix].sorted_frags.frags {
|
||||
if xxIsLastUse {
|
||||
// We're checking to see if `vreg` has a last use in this block
|
||||
// (well, technically, a fragment end in the block; we don't care if
|
||||
// it is later redefined in the same block) .. anyway ..
|
||||
// We're checking to see if `vreg` has a last use in this block
|
||||
// at `iix`.u
|
||||
if frag.last == InstPoint::new_use(iix) {
|
||||
return Some(*vlrix);
|
||||
}
|
||||
} else {
|
||||
// We're checking to see if `vreg` has a first def in this block
|
||||
// at `iix`.d
|
||||
if frag.first == InstPoint::new_def(iix) {
|
||||
return Some(*vlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// Range end checks for RRegs. XX has same meaning as above.
|
||||
let doesRRegHaveXXat
|
||||
// `xxIsLastUse` is true means "XX is last use"
|
||||
// `xxIsLastUse` is false means "XX is first def"
|
||||
= |xxIsLastUse: bool, rreg: RealReg, iix: InstIx|
|
||||
-> Option<RealRangeIx> {
|
||||
let rreg_no = rreg.get_index();
|
||||
let rlrixs = &rreg_to_rlrs_map[rreg_no];
|
||||
for rlrix in rlrixs {
|
||||
let frags = &rlr_env[*rlrix].sorted_frags;
|
||||
for fix in &frags.frag_ixs {
|
||||
let frag = &frag_env[*fix];
|
||||
if xxIsLastUse {
|
||||
// We're checking to see if `rreg` has a last use in this block
|
||||
// at `iix`.u
|
||||
if frag.last == InstPoint::new_use(iix) {
|
||||
return Some(*rlrix);
|
||||
}
|
||||
} else {
|
||||
// We're checking to see if `rreg` has a first def in this block
|
||||
// at `iix`.d
|
||||
if frag.first == InstPoint::new_def(iix) {
|
||||
return Some(*rlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// Make up a vector of registers that are connected by moves:
|
||||
//
|
||||
// (dstReg, srcReg, transferring insn, estimated execution count of the
|
||||
// containing block)
|
||||
//
|
||||
// This can contain real-to-real moves, which we obviously can't do anything
|
||||
// about. We'll remove them in the next pass.
|
||||
let mut connectedByMoves = Vec::<(Reg, Reg, InstIx, u32)>::new();
|
||||
for b in func.blocks() {
|
||||
let block_eef = est_freqs[b];
|
||||
for iix in func.block_insns(b) {
|
||||
let insn = &func.get_insn(iix);
|
||||
let im = func.is_move(insn);
|
||||
match im {
|
||||
None => {}
|
||||
Some((wreg, reg)) => {
|
||||
let iix_bounds = ®_vecs_and_bounds.bounds[iix];
|
||||
// It might seem strange to assert that `defs_len` and/or
|
||||
// `uses_len` is <= 1 rather than == 1. The reason is
|
||||
// that either or even both registers might be ones which
|
||||
// are not available to the allocator. Hence they will
|
||||
// have been removed by the sanitisation machinery before
|
||||
// we get to this point. If either is missing, we
|
||||
// unfortunately can't coalesce the move away, and just
|
||||
// have to live with it.
|
||||
//
|
||||
// If any of the following five assertions fail, the
|
||||
// client's `is_move` is probably lying to us.
|
||||
assert!(iix_bounds.uses_len <= 1);
|
||||
assert!(iix_bounds.defs_len <= 1);
|
||||
assert!(iix_bounds.mods_len == 0);
|
||||
if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
|
||||
let reg_vecs = ®_vecs_and_bounds.vecs;
|
||||
assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
|
||||
assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
|
||||
connectedByMoves.push((wreg.to_reg(), reg, iix, block_eef));
|
||||
}
|
||||
// Range checks for VRegs -- last use.
|
||||
let doesVRegHaveLastUseAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
|
||||
let vreg_no = vreg.get_index();
|
||||
let vlrixs = ®_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
|
||||
for vlrix in vlrixs {
|
||||
for frag in &vlr_env[*vlrix].sorted_frags.frags {
|
||||
// We're checking to see if `vreg` has a last use in this block
|
||||
// (well, technically, a fragment end in the block; we don't care if
|
||||
// it is later redefined in the same block) .. anyway ..
|
||||
// We're checking to see if `vreg` has a last use in this block
|
||||
// at `iix`.u
|
||||
if frag.last == InstPoint::new_use(iix) {
|
||||
return Some(*vlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// XX these sub-vectors could contain duplicates, I suppose, for example if
|
||||
// there are two identical copy insns at different points on the "boundary"
|
||||
// for some VLR. I don't think it matters though since we're going to rank
|
||||
// the hints by strength and then choose at most one.
|
||||
// Range checks for VRegs -- first def.
|
||||
let doesVRegHaveFirstDefAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
|
||||
let vreg_no = vreg.get_index();
|
||||
let vlrixs = ®_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
|
||||
for vlrix in vlrixs {
|
||||
for frag in &vlr_env[*vlrix].sorted_frags.frags {
|
||||
// We're checking to see if `vreg` has a first def in this block at `iix`.d
|
||||
if frag.first == InstPoint::new_def(iix) {
|
||||
return Some(*vlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// Range checks for RRegs -- last use.
|
||||
let doesRRegHaveLastUseAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
|
||||
let rreg_no = rreg.get_index();
|
||||
let rlrixs = ®_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
|
||||
for rlrix in rlrixs {
|
||||
let frags = &rlr_env[*rlrix].sorted_frags;
|
||||
for fix in &frags.frag_ixs {
|
||||
let frag = &frag_env[*fix];
|
||||
// We're checking to see if `rreg` has a last use in this block at `iix`.u
|
||||
if frag.last == InstPoint::new_use(iix) {
|
||||
return Some(*rlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// Range checks for RRegs -- first def.
|
||||
let doesRRegHaveFirstDefAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
|
||||
let rreg_no = rreg.get_index();
|
||||
let rlrixs = ®_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
|
||||
for rlrix in rlrixs {
|
||||
let frags = &rlr_env[*rlrix].sorted_frags;
|
||||
for fix in &frags.frag_ixs {
|
||||
let frag = &frag_env[*fix];
|
||||
// We're checking to see if `rreg` has a first def in this block at `iix`.d
|
||||
if frag.first == InstPoint::new_def(iix) {
|
||||
return Some(*rlrix);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
// RETURNED TO CALLER
|
||||
// Hints for each VirtualRange. Note that the SmallVecs could contain duplicates, I
|
||||
// suppose, for example if there are two identical copy insns at different points on the
|
||||
// "boundary" for some VLR. I don't think it matters though since we're going to rank the
|
||||
// hints by strength and then choose at most one.
|
||||
let mut hints = TypedIxVec::<VirtualRangeIx, SmallVec<[Hint; 8]>>::new();
|
||||
hints.resize(vlr_env.len(), smallvec![]);
|
||||
|
||||
// RETURNED TO CALLER
|
||||
// A vector that simply records which insns are v-to-v boundary moves, as established by the
|
||||
// analysis below. This info is collected here because (1) the caller (BT) needs to have it
|
||||
// and (2) this is the first point at which we can efficiently compute it.
|
||||
let mut is_vv_boundary_move = TypedIxVec::<InstIx, bool>::new();
|
||||
is_vv_boundary_move.resize(func.insns().len() as u32, false);
|
||||
|
||||
// RETURNED TO CALLER (after finalisation)
|
||||
// The virtual-to-virtual equivalence classes we're collecting.
|
||||
let mut vlrEquivClassesUF = UnionFind::<VirtualRangeIx>::new(vlr_env.len() as usize);
|
||||
|
||||
// Not returned to caller; for use only in this function.
|
||||
// A list of `VirtualRange`s for which the `total_cost` (hence also their
|
||||
// `spill_cost`) should be adjusted downwards by the supplied `u32`. We
|
||||
// can't do this directly in the loop below due to borrowing constraints,
|
||||
|
@ -330,18 +246,25 @@ pub fn do_coalescing_analysis<F: Function>(
|
|||
// loop.
|
||||
let mut decVLRcosts = Vec::<(VirtualRangeIx, VirtualRangeIx, u32)>::new();
|
||||
|
||||
for (rDst, rSrc, iix, block_eef) in connectedByMoves {
|
||||
for MoveInfoElem {
|
||||
dst,
|
||||
src,
|
||||
iix,
|
||||
est_freq,
|
||||
..
|
||||
} in &move_info.moves
|
||||
{
|
||||
debug!(
|
||||
"QQQQ connectedByMoves {:?} {:?} <- {:?} (block_eef {})",
|
||||
iix, rDst, rSrc, block_eef
|
||||
"connected by moves: {:?} {:?} <- {:?} (est_freq {})",
|
||||
iix, dst, src, est_freq
|
||||
);
|
||||
match (rDst.is_virtual(), rSrc.is_virtual()) {
|
||||
match (dst.is_virtual(), src.is_virtual()) {
|
||||
(true, true) => {
|
||||
// Check for a V <- V hint.
|
||||
let rSrcV = rSrc.to_virtual_reg();
|
||||
let rDstV = rDst.to_virtual_reg();
|
||||
let mb_vlrixSrc = doesVRegHaveXXat(/*xxIsLastUse=*/ true, rSrcV, iix);
|
||||
let mb_vlrixDst = doesVRegHaveXXat(/*xxIsLastUse=*/ false, rDstV, iix);
|
||||
let srcV = src.to_virtual_reg();
|
||||
let dstV = dst.to_virtual_reg();
|
||||
let mb_vlrixSrc = doesVRegHaveLastUseAt(srcV, *iix);
|
||||
let mb_vlrixDst = doesVRegHaveFirstDefAt(dstV, *iix);
|
||||
if mb_vlrixSrc.is_some() && mb_vlrixDst.is_some() {
|
||||
let vlrixSrc = mb_vlrixSrc.unwrap();
|
||||
let vlrixDst = mb_vlrixDst.unwrap();
|
||||
|
@ -353,39 +276,39 @@ pub fn do_coalescing_analysis<F: Function>(
|
|||
// Add hints for both VLRs, since we don't know which one will
|
||||
// assign first. Indeed, a VLR may be assigned and un-assigned
|
||||
// arbitrarily many times.
|
||||
hints[vlrixSrc].push(Hint::SameAs(vlrixDst, block_eef));
|
||||
hints[vlrixDst].push(Hint::SameAs(vlrixSrc, block_eef));
|
||||
hints[vlrixSrc].push(Hint::SameAs(vlrixDst, *est_freq));
|
||||
hints[vlrixDst].push(Hint::SameAs(vlrixSrc, *est_freq));
|
||||
vlrEquivClassesUF.union(vlrixDst, vlrixSrc);
|
||||
is_vv_boundary_move[iix] = true;
|
||||
is_vv_boundary_move[*iix] = true;
|
||||
// Reduce the total cost, and hence the spill cost, of
|
||||
// both `vlrixSrc` and `vlrixDst`. This is so as to reduce to
|
||||
// zero, the cost of a VLR whose only instructions are its
|
||||
// v-v boundary copies.
|
||||
debug!("QQQQ reduce cost of {:?} and {:?}", vlrixSrc, vlrixDst);
|
||||
decVLRcosts.push((vlrixSrc, vlrixDst, 1 * block_eef));
|
||||
debug!("reduce cost of {:?} and {:?}", vlrixSrc, vlrixDst);
|
||||
decVLRcosts.push((vlrixSrc, vlrixDst, 1 * est_freq));
|
||||
}
|
||||
}
|
||||
}
|
||||
(true, false) => {
|
||||
// Check for a V <- R hint.
|
||||
let rSrcR = rSrc.to_real_reg();
|
||||
let rDstV = rDst.to_virtual_reg();
|
||||
let mb_rlrSrc = doesRRegHaveXXat(/*xxIsLastUse=*/ true, rSrcR, iix);
|
||||
let mb_vlrDst = doesVRegHaveXXat(/*xxIsLastUse=*/ false, rDstV, iix);
|
||||
let srcR = src.to_real_reg();
|
||||
let dstV = dst.to_virtual_reg();
|
||||
let mb_rlrSrc = doesRRegHaveLastUseAt(srcR, *iix);
|
||||
let mb_vlrDst = doesVRegHaveFirstDefAt(dstV, *iix);
|
||||
if mb_rlrSrc.is_some() && mb_vlrDst.is_some() {
|
||||
let vlrDst = mb_vlrDst.unwrap();
|
||||
hints[vlrDst].push(Hint::Exactly(rSrcR, block_eef));
|
||||
hints[vlrDst].push(Hint::Exactly(srcR, *est_freq));
|
||||
}
|
||||
}
|
||||
(false, true) => {
|
||||
// Check for a R <- V hint.
|
||||
let rSrcV = rSrc.to_virtual_reg();
|
||||
let rDstR = rDst.to_real_reg();
|
||||
let mb_vlrSrc = doesVRegHaveXXat(/*xxIsLastUse=*/ true, rSrcV, iix);
|
||||
let mb_rlrDst = doesRRegHaveXXat(/*xxIsLastUse=*/ false, rDstR, iix);
|
||||
let srcV = src.to_virtual_reg();
|
||||
let dstR = dst.to_real_reg();
|
||||
let mb_vlrSrc = doesVRegHaveLastUseAt(srcV, *iix);
|
||||
let mb_rlrDst = doesRRegHaveFirstDefAt(dstR, *iix);
|
||||
if mb_vlrSrc.is_some() && mb_rlrDst.is_some() {
|
||||
let vlrSrc = mb_vlrSrc.unwrap();
|
||||
hints[vlrSrc].push(Hint::Exactly(rDstR, block_eef));
|
||||
hints[vlrSrc].push(Hint::Exactly(dstR, *est_freq));
|
||||
}
|
||||
}
|
||||
(false, false) => {
|
||||
|
@ -468,10 +391,5 @@ pub fn do_coalescing_analysis<F: Function>(
|
|||
info!("do_coalescing_analysis: end");
|
||||
info!("");
|
||||
|
||||
(
|
||||
hints,
|
||||
vlrEquivClasses,
|
||||
is_vv_boundary_move,
|
||||
vreg_to_vlrs_map,
|
||||
)
|
||||
(hints, vlrEquivClasses, is_vv_boundary_move)
|
||||
}
|
||||
|
|
|
@ -6,61 +6,62 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use crate::avl_tree::AVLTree;
|
||||
use crate::avl_tree::{AVLTree, AVL_NULL};
|
||||
use crate::data_structures::{
|
||||
cmp_range_frags, RangeFrag, RangeFragIx, SortedRangeFragIxs, SortedRangeFrags, TypedIxVec,
|
||||
VirtualRangeIx,
|
||||
cmp_range_frags, InstPoint, RangeFrag, RangeFragIx, RangeId, SortedRangeFragIxs,
|
||||
SortedRangeFrags, TypedIxVec,
|
||||
};
|
||||
|
||||
//=============================================================================
|
||||
// Per-real-register commitment maps
|
||||
//
|
||||
|
||||
// Something that pairs a fragment index with the index of the virtual range
|
||||
// to which this fragment conceptually "belongs", at least for the purposes of
|
||||
// this commitment map. Alternatively, the `vlrix` field may be None, which
|
||||
// indicates that the associated fragment belongs to a real-reg live range and
|
||||
// is therefore non-evictable.
|
||||
// Something that pairs a fragment index with the identity of the virtual or real range to which
|
||||
// this fragment conceptually "belongs", at least for the purposes of this commitment map. If
|
||||
// the `lr_id` field denotes a real range, the associated fragment belongs to a real-reg live
|
||||
// range and is therefore non-evictable. The identity of the range is necessary because:
|
||||
//
|
||||
// (A fragment merely denotes a sequence of instruction (points), but within
|
||||
// the context of a commitment map for a real register, obviously any
|
||||
// particular fragment can't be part of two different virtual live ranges.)
|
||||
// * for VirtualRanges, (1) we may need to evict the mapping, so we will need to get hold of the
|
||||
// VirtualRange, so that we have all fragments of the VirtualRange to hand, and (2) if the
|
||||
// client requires stackmaps, we need to look at the VirtualRange to see if it is reftyped.
|
||||
//
|
||||
// Note that we don't intend to actually use the PartialOrd methods for
|
||||
// FIxAndVLRix. However, they need to exist since we want to construct an
|
||||
// AVLTree<FIxAndVLRix>, and that requires PartialOrd for its element type.
|
||||
// For working with such trees we will supply our own comparison function;
|
||||
// hence PartialOrd here serves only to placate the typechecker. It should
|
||||
// never actually be used.
|
||||
// * for RealRanges, only (2) applies; (1) is irrelevant since RealRange assignments are
|
||||
// non-evictable.
|
||||
//
|
||||
// (A fragment merely denotes a sequence of instruction (points), but within the context of a
|
||||
// commitment map for a real register, obviously any particular fragment can't be part of two
|
||||
// different virtual live ranges.)
|
||||
//
|
||||
// Note that we don't intend to actually use the PartialOrd methods for RangeFragAndRangeId.
|
||||
// However, they need to exist since we want to construct an AVLTree<RangeFragAndRangeId>, and
|
||||
// that requires PartialOrd for its element type. For working with such trees we will supply
|
||||
// our own comparison function; hence PartialOrd here serves only to placate the typechecker.
|
||||
// It should never actually be used.
|
||||
#[derive(Clone)]
|
||||
pub struct RangeFragAndVLRIx {
|
||||
pub struct RangeFragAndRangeId {
|
||||
pub frag: RangeFrag,
|
||||
pub mb_vlrix: Option<VirtualRangeIx>,
|
||||
pub id: RangeId,
|
||||
}
|
||||
impl RangeFragAndVLRIx {
|
||||
fn new(frag: RangeFrag, mb_vlrix: Option<VirtualRangeIx>) -> Self {
|
||||
Self { frag, mb_vlrix }
|
||||
impl RangeFragAndRangeId {
|
||||
fn new(frag: RangeFrag, id: RangeId) -> Self {
|
||||
Self { frag, id }
|
||||
}
|
||||
}
|
||||
impl PartialEq for RangeFragAndVLRIx {
|
||||
impl PartialEq for RangeFragAndRangeId {
|
||||
fn eq(&self, _other: &Self) -> bool {
|
||||
// See comments above.
|
||||
panic!("impl PartialEq for RangeFragAndVLRIx: should never be used");
|
||||
panic!("impl PartialEq for RangeFragAndRangeId: should never be used");
|
||||
}
|
||||
}
|
||||
impl PartialOrd for RangeFragAndVLRIx {
|
||||
impl PartialOrd for RangeFragAndRangeId {
|
||||
fn partial_cmp(&self, _other: &Self) -> Option<Ordering> {
|
||||
// See comments above.
|
||||
panic!("impl PartialOrd for RangeFragAndVLRIx: should never be used");
|
||||
panic!("impl PartialOrd for RangeFragAndRangeId: should never be used");
|
||||
}
|
||||
}
|
||||
impl fmt::Debug for RangeFragAndVLRIx {
|
||||
impl fmt::Debug for RangeFragAndRangeId {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let vlrix_string = match self.mb_vlrix {
|
||||
None => "NONE".to_string(),
|
||||
Some(vlrix) => format!("{:?}", vlrix),
|
||||
};
|
||||
write!(fmt, "(FnV {:?} {})", self.frag, vlrix_string)
|
||||
write!(fmt, "(FnV {:?} {:?})", self.frag, self.id)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,13 +71,10 @@ impl fmt::Debug for RangeFragAndVLRIx {
|
|||
|
||||
// This indicates the current set of fragments to which some real register is
|
||||
// currently "committed". The fragments *must* be non-overlapping. Hence
|
||||
// they form a total order, and so they must appear in the vector sorted by
|
||||
// that order.
|
||||
//
|
||||
// Overall this is identical to SortedRangeFragIxs, except extended so that
|
||||
// each FragIx is tagged with an Option<VirtualRangeIx>.
|
||||
// they form a total order, and so we may validly build an AVL tree of them.
|
||||
|
||||
pub struct CommitmentMap {
|
||||
pub tree: AVLTree<RangeFragAndVLRIx>,
|
||||
pub tree: AVLTree<RangeFragAndRangeId>,
|
||||
}
|
||||
impl fmt::Debug for CommitmentMap {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
|
@ -88,27 +86,20 @@ impl fmt::Debug for CommitmentMap {
|
|||
impl CommitmentMap {
|
||||
pub fn new() -> Self {
|
||||
// The AVL tree constructor needs a default value for the elements. It
|
||||
// will never be used. The not-present index value will show as
|
||||
// will never be used. The RangeId index value will show as
|
||||
// obviously bogus if we ever try to "dereference" any part of it.
|
||||
let dflt = RangeFragAndVLRIx::new(
|
||||
RangeFrag::invalid_value(),
|
||||
Some(VirtualRangeIx::invalid_value()),
|
||||
);
|
||||
let dflt = RangeFragAndRangeId::new(RangeFrag::invalid_value(), RangeId::invalid_value());
|
||||
Self {
|
||||
tree: AVLTree::<RangeFragAndVLRIx>::new(dflt),
|
||||
tree: AVLTree::<RangeFragAndRangeId>::new(dflt),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(
|
||||
&mut self,
|
||||
to_add_frags: &SortedRangeFrags,
|
||||
to_add_mb_vlrix: Option<VirtualRangeIx>,
|
||||
) {
|
||||
pub fn add(&mut self, to_add_frags: &SortedRangeFrags, to_add_lr_id: RangeId) {
|
||||
for frag in &to_add_frags.frags {
|
||||
let to_add = RangeFragAndVLRIx::new(frag.clone(), to_add_mb_vlrix);
|
||||
let to_add = RangeFragAndRangeId::new(frag.clone(), to_add_lr_id);
|
||||
let added = self.tree.insert(
|
||||
to_add,
|
||||
Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
|
||||
Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
|
||||
cmp_range_frags(&pair1.frag, &pair2.frag)
|
||||
}),
|
||||
);
|
||||
|
@ -121,14 +112,14 @@ impl CommitmentMap {
|
|||
pub fn add_indirect(
|
||||
&mut self,
|
||||
to_add_frags: &SortedRangeFragIxs,
|
||||
to_add_mb_vlrix: Option<VirtualRangeIx>,
|
||||
to_add_lr_id: RangeId,
|
||||
frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
|
||||
) {
|
||||
for fix in &to_add_frags.frag_ixs {
|
||||
let to_add = RangeFragAndVLRIx::new(frag_env[*fix].clone(), to_add_mb_vlrix);
|
||||
let to_add = RangeFragAndRangeId::new(frag_env[*fix].clone(), to_add_lr_id);
|
||||
let added = self.tree.insert(
|
||||
to_add,
|
||||
Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
|
||||
Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
|
||||
cmp_range_frags(&pair1.frag, &pair2.frag)
|
||||
}),
|
||||
);
|
||||
|
@ -140,12 +131,12 @@ impl CommitmentMap {
|
|||
|
||||
pub fn del(&mut self, to_del_frags: &SortedRangeFrags) {
|
||||
for frag in &to_del_frags.frags {
|
||||
// re None: we don't care what the VLRIx is, since we're deleting by
|
||||
// RangeFrags alone.
|
||||
let to_del = RangeFragAndVLRIx::new(frag.clone(), None);
|
||||
// re RangeId::invalid_value(): we don't care what the RangeId is, since we're
|
||||
// deleting by RangeFrags alone.
|
||||
let to_del = RangeFragAndRangeId::new(frag.clone(), RangeId::invalid_value());
|
||||
let deleted = self.tree.delete(
|
||||
to_del,
|
||||
Some(&|pair1: RangeFragAndVLRIx, pair2: RangeFragAndVLRIx| {
|
||||
Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
|
||||
cmp_range_frags(&pair1.frag, &pair2.frag)
|
||||
}),
|
||||
);
|
||||
|
@ -154,4 +145,26 @@ impl CommitmentMap {
|
|||
assert!(deleted);
|
||||
}
|
||||
}
|
||||
|
||||
// Find the RangeId for the RangeFrag that overlaps `pt`, if one exists.
|
||||
// This is conceptually equivalent to LogicalSpillSlot::get_refness_at_inst_point.
|
||||
pub fn lookup_inst_point(&self, pt: InstPoint) -> Option<RangeId> {
|
||||
let mut root = self.tree.root;
|
||||
while root != AVL_NULL {
|
||||
let root_node = &self.tree.pool[root as usize];
|
||||
let root_item = &root_node.item;
|
||||
if pt < root_item.frag.first {
|
||||
// `pt` is to the left of the `root`. So there's no
|
||||
// overlap with `root`. Continue by inspecting the left subtree.
|
||||
root = root_node.left;
|
||||
} else if root_item.frag.last < pt {
|
||||
// Ditto for the right subtree.
|
||||
root = root_node.right;
|
||||
} else {
|
||||
// `pt` overlaps the `root`, so we have what we want.
|
||||
return Some(root_item.id);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,18 +12,21 @@ use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_
|
|||
use crate::analysis_main::{run_analysis, AnalysisInfo};
|
||||
use crate::avl_tree::{AVLTree, AVL_NULL};
|
||||
use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint};
|
||||
use crate::bt_commitment_map::{CommitmentMap, RangeFragAndVLRIx};
|
||||
use crate::bt_commitment_map::{CommitmentMap, RangeFragAndRangeId};
|
||||
use crate::bt_spillslot_allocator::SpillSlotAllocator;
|
||||
use crate::bt_vlr_priority_queue::VirtualRangePrioQ;
|
||||
use crate::data_structures::{
|
||||
BlockIx, InstIx, InstPoint, Point, RangeFrag, RangeFragIx, RealRange, RealReg, RealRegUniverse,
|
||||
Reg, RegVecBounds, RegVecs, Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec,
|
||||
VirtualRange, VirtualRangeIx, VirtualReg, Writable,
|
||||
BlockIx, InstIx, InstPoint, Map, Point, RangeFrag, RangeFragIx, RangeId, RealRange,
|
||||
RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegVecBounds, RegVecs, RegVecsAndBounds,
|
||||
Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec, VirtualRange, VirtualRangeIx,
|
||||
VirtualReg, Writable,
|
||||
};
|
||||
use crate::inst_stream::{
|
||||
edit_inst_stream, ExtPoint, InstExtPoint, InstToInsert, InstToInsertAndExtPoint,
|
||||
};
|
||||
use crate::inst_stream::{edit_inst_stream, InstToInsert, InstToInsertAndPoint};
|
||||
use crate::sparse_set::SparseSetU;
|
||||
use crate::union_find::UnionFindEquivClasses;
|
||||
use crate::{Function, RegAllocError, RegAllocResult};
|
||||
use crate::{AlgorithmWithDefaults, Function, RegAllocError, RegAllocResult, StackmapRequestInfo};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BacktrackingOptions {
|
||||
|
@ -75,12 +78,21 @@ impl PerRealReg {
|
|||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) {
|
||||
// Commit this register to `to_add`, irrevocably. Don't add it to
|
||||
// `vlrixs_assigned` since we will never want to later evict the
|
||||
// assignment.
|
||||
self.committed
|
||||
.add_indirect(&to_add.sorted_frags, None, frag_env);
|
||||
fn add_RealRange(
|
||||
&mut self,
|
||||
to_add_rlrix: RealRangeIx,
|
||||
rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
|
||||
frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
|
||||
) {
|
||||
// Commit this register to `to_add`, irrevocably. Don't add it to `vlrixs_assigned`
|
||||
// since we will never want to later evict the assignment. (Also, from a types point of
|
||||
// view that would be impossible.)
|
||||
let to_add_rlr = &rlr_env[to_add_rlrix];
|
||||
self.committed.add_indirect(
|
||||
&to_add_rlr.sorted_frags,
|
||||
RangeId::new_real(to_add_rlrix),
|
||||
frag_env,
|
||||
);
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
|
@ -91,7 +103,7 @@ impl PerRealReg {
|
|||
) {
|
||||
let to_add_vlr = &vlr_env[to_add_vlrix];
|
||||
self.committed
|
||||
.add(&to_add_vlr.sorted_frags, Some(to_add_vlrix));
|
||||
.add(&to_add_vlr.sorted_frags, RangeId::new_virtual(to_add_vlrix));
|
||||
assert!(!self.vlrixs_assigned.contains(to_add_vlrix));
|
||||
self.vlrixs_assigned.insert(to_add_vlrix);
|
||||
}
|
||||
|
@ -103,6 +115,8 @@ impl PerRealReg {
|
|||
vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
) {
|
||||
// Remove it from `vlrixs_assigned`
|
||||
// FIXME 2020June18: we could do this more efficiently by inspecting
|
||||
// the return value from `delete`.
|
||||
if self.vlrixs_assigned.contains(to_del_vlrix) {
|
||||
self.vlrixs_assigned.delete(to_del_vlrix);
|
||||
} else {
|
||||
|
@ -130,7 +144,7 @@ fn search_commitment_tree<IsAllowedToEvict>(
|
|||
running_set: &mut SparseSetU<[VirtualRangeIx; 4]>,
|
||||
running_cost: &mut SpillCost,
|
||||
// The tree to search.
|
||||
tree: &AVLTree<RangeFragAndVLRIx>,
|
||||
tree: &AVLTree<RangeFragAndRangeId>,
|
||||
// The RangeFrag we want to accommodate.
|
||||
pair_frag: &RangeFrag,
|
||||
spill_cost_budget: &SpillCost,
|
||||
|
@ -156,14 +170,14 @@ where
|
|||
// Let's first consider the current node. If we need it but it's not
|
||||
// evictable, we might as well stop now.
|
||||
if overlaps_curr {
|
||||
// This frag has no associated VirtualRangeIx, so it is part of a
|
||||
// RealRange, and hence not evictable.
|
||||
if curr_node_item.mb_vlrix.is_none() {
|
||||
// This frag is committed to a real range, not a virtual one, and hence is not
|
||||
// evictable.
|
||||
if curr_node_item.id.is_real() {
|
||||
return false;
|
||||
}
|
||||
// Maybe this one is a spill range, in which case, it can't be
|
||||
// evicted.
|
||||
let vlrix_to_evict = curr_node_item.mb_vlrix.unwrap();
|
||||
let vlrix_to_evict = curr_node_item.id.to_virtual();
|
||||
let vlr_to_evict = &vlr_env[vlrix_to_evict];
|
||||
if vlr_to_evict.spill_cost.is_infinite() {
|
||||
return false;
|
||||
|
@ -368,6 +382,180 @@ fn print_RA_state(
|
|||
debug!(">>>>");
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Reftype/stackmap support
|
||||
|
||||
// This creates the artefacts for a safepoint/stackmap at some insn `iix`: the set of reftyped
|
||||
// spill slots, the spills to be placed at `iix.r` (yes, you read that right) and the reloads to
|
||||
// be placed at `iix.s`.
|
||||
//
|
||||
// This consults:
|
||||
//
|
||||
// * the commitment maps, to figure out which real registers are live and reftyped at `iix.u`.
|
||||
//
|
||||
// * the spillslot allocator, to figure out which spill slots are live and reftyped at `iix.u`.
|
||||
//
|
||||
// This may fail, meaning the request is in some way nonsensical; failure is propagated upwards.
|
||||
|
||||
fn get_stackmap_artefacts_at(
|
||||
spill_slot_allocator: &mut SpillSlotAllocator,
|
||||
univ: &RealRegUniverse,
|
||||
reftype_class: RegClass,
|
||||
reg_vecs_and_bounds: &RegVecsAndBounds,
|
||||
per_real_reg: &Vec<PerRealReg>,
|
||||
rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
|
||||
vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
|
||||
iix: InstIx,
|
||||
) -> Result<(Vec<InstToInsert>, Vec<InstToInsert>, Vec<SpillSlot>), RegAllocError> {
|
||||
// From a code generation perspective, what we need to compute is:
|
||||
//
|
||||
// * Sbefore: real regs that are live at `iix.u`, that are reftypes
|
||||
//
|
||||
// * Safter: Sbefore - real regs written by `iix`
|
||||
//
|
||||
// Then:
|
||||
//
|
||||
// * for r in Sbefore . add "spill r" at `iix.r` *after* all the reloads that are already
|
||||
// there
|
||||
//
|
||||
// * for r in Safter . add "reload r" at `iix.s` *before* all the spills that are already
|
||||
// there
|
||||
//
|
||||
// Once those spills have been "recorded" by the `spill_slot_allocator`, we can then ask it
|
||||
// to tell us all the reftyped spill slots at `iix.u`, and that's our stackmap! This routine
|
||||
// only computes the stackmap and the vectors of spills and reloads. It doesn't deal with
|
||||
// interleaving them into the final code sequence.
|
||||
//
|
||||
// Note that this scheme isn't as runtime-inefficient as it sounds, at least in the
|
||||
// SpiderMonkey use case and where `iix` is a call insn. That's because SM's calling
|
||||
// convention has no callee saved registers. Hence "real regs written by `iix`" will be
|
||||
// "all real regs" and so Safter will be empty. And Sbefore is in any case pretty small.
|
||||
//
|
||||
// (/me thinks ..) hmm, if Safter is empty, then what is the point of dumping Sbefore on the
|
||||
// stack before the GC? For r in Sbefore, either r is the only reference to some object, in
|
||||
// which case there's no point in presenting that ref to the GC since r is dead after call,
|
||||
// or r isn't the only ref to the object, in which case some other ref to it must exist
|
||||
// elsewhere in the stack, and that will keep the object alive. Maybe this needs a rethink.
|
||||
// Maybe the spills before the call should be only for the set Safter?
|
||||
|
||||
let pt = InstPoint::new_use(iix);
|
||||
|
||||
// Compute Sbefore.
|
||||
|
||||
// FIXME change this to SparseSet
|
||||
let mut s_before = Set::<RealReg>::empty();
|
||||
|
||||
let rci = univ.allocable_by_class[reftype_class.rc_to_usize()];
|
||||
if rci.is_none() {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: no regs in specified reftype class".to_string(),
|
||||
));
|
||||
}
|
||||
let rci = rci.unwrap();
|
||||
|
||||
debug!("computing stackmap info at {:?}", pt);
|
||||
|
||||
for rreg_no in rci.first..rci.last + 1 {
|
||||
// Get the RangeId, if any, assigned for `rreg_no` at `iix.u`. From that we can figure
|
||||
// out if it is reftyped.
|
||||
let mb_range_id = per_real_reg[rreg_no].committed.lookup_inst_point(pt);
|
||||
if let Some(range_id) = mb_range_id {
|
||||
// `rreg_no` is live at `iix.u`.
|
||||
let is_ref = if range_id.is_real() {
|
||||
debug!(
|
||||
" real reg {:?} is real-range {:?}",
|
||||
rreg_no,
|
||||
rlr_env[range_id.to_real()]
|
||||
);
|
||||
rlr_env[range_id.to_real()].is_ref
|
||||
} else {
|
||||
debug!(
|
||||
" real reg {:?} is virtual-range {:?}",
|
||||
rreg_no,
|
||||
vlr_env[range_id.to_virtual()]
|
||||
);
|
||||
vlr_env[range_id.to_virtual()].is_ref
|
||||
};
|
||||
if is_ref {
|
||||
// Finally .. we know that `rreg_no` is reftyped and live at `iix.u`.
|
||||
let rreg = univ.regs[rreg_no].0;
|
||||
s_before.insert(rreg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Sbefore = {:?}", s_before);
|
||||
|
||||
// Compute Safter.
|
||||
|
||||
let mut s_after = s_before.clone();
|
||||
let bounds = ®_vecs_and_bounds.bounds[iix];
|
||||
if bounds.mods_len != 0 {
|
||||
// Only the GC is allowed to modify reftyped regs at this insn!
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: safepoint insn modifies a reftyped reg".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for i in bounds.defs_start..bounds.defs_start + bounds.defs_len as u32 {
|
||||
let r_defd = reg_vecs_and_bounds.vecs.defs[i as usize];
|
||||
if r_defd.is_real() && r_defd.get_class() == reftype_class {
|
||||
s_after.delete(r_defd.to_real_reg());
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Safter = {:?}", s_before);
|
||||
|
||||
// Create the spill insns, as defined by Sbefore. This has the side effect of recording the
|
||||
// spill in `spill_slot_allocator`, so we can later ask it to tell us all the reftyped spill
|
||||
// slots.
|
||||
|
||||
let frag = RangeFrag::new(InstPoint::new_reload(iix), InstPoint::new_spill(iix));
|
||||
|
||||
let mut spill_insns = Vec::<InstToInsert>::new();
|
||||
let mut where_reg_got_spilled_to = Map::<RealReg, SpillSlot>::default();
|
||||
|
||||
for from_reg in s_before.iter() {
|
||||
let to_slot = spill_slot_allocator.alloc_reftyped_spillslot_for_frag(frag.clone());
|
||||
let spill = InstToInsert::Spill {
|
||||
to_slot,
|
||||
from_reg: *from_reg,
|
||||
for_vreg: None, // spill isn't associated with any virtual reg
|
||||
};
|
||||
spill_insns.push(spill);
|
||||
// We also need to remember where we stashed it, so we can reload it, if it is in Safter.
|
||||
if s_after.contains(*from_reg) {
|
||||
where_reg_got_spilled_to.insert(*from_reg, to_slot);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the reload insns, as defined by Safter. Except, we might as well use the map we
|
||||
// just made, since its domain is the same as Safter.
|
||||
|
||||
let mut reload_insns = Vec::<InstToInsert>::new();
|
||||
|
||||
for (to_reg, from_slot) in where_reg_got_spilled_to.iter() {
|
||||
let reload = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(*to_reg),
|
||||
from_slot: *from_slot,
|
||||
for_vreg: None, // reload isn't associated with any virtual reg
|
||||
};
|
||||
reload_insns.push(reload);
|
||||
}
|
||||
|
||||
// And finally .. round up all the reftyped spill slots. That includes both "normal" spill
|
||||
// slots that happen to hold reftyped values, as well as the "extras" we created here, to
|
||||
// hold values of reftyped regs that are live over this instruction.
|
||||
|
||||
let reftyped_spillslots = spill_slot_allocator.get_reftyped_spillslots_at_inst_point(pt);
|
||||
|
||||
debug!("reftyped_spillslots = {:?}", reftyped_spillslots);
|
||||
|
||||
// And we're done!
|
||||
|
||||
Ok((spill_insns, reload_insns, reftyped_spillslots))
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Allocator top level
|
||||
|
||||
|
@ -471,9 +659,23 @@ impl fmt::Debug for EditListItem {
|
|||
pub fn alloc_main<F: Function>(
|
||||
func: &mut F,
|
||||
reg_universe: &RealRegUniverse,
|
||||
stackmap_request: Option<&StackmapRequestInfo>,
|
||||
use_checker: bool,
|
||||
opts: &BacktrackingOptions,
|
||||
) -> Result<RegAllocResult<F>, RegAllocError> {
|
||||
// -------- Initial arrangements for stackmaps --------
|
||||
let empty_vec_vregs = vec![];
|
||||
let empty_vec_iixs = vec![];
|
||||
let (client_wants_stackmaps, reftype_class, reftyped_vregs, safepoint_insns) =
|
||||
match stackmap_request {
|
||||
Some(&StackmapRequestInfo {
|
||||
reftype_class,
|
||||
ref reftyped_vregs,
|
||||
ref safepoint_insns,
|
||||
}) => (true, reftype_class, reftyped_vregs, safepoint_insns),
|
||||
None => (false, RegClass::INVALID, &empty_vec_vregs, &empty_vec_iixs),
|
||||
};
|
||||
|
||||
// -------- Perform initial liveness analysis --------
|
||||
// Note that the analysis phase can fail; hence we propagate any error.
|
||||
let AnalysisInfo {
|
||||
|
@ -484,26 +686,38 @@ pub fn alloc_main<F: Function>(
|
|||
range_metrics: frag_metrics_env,
|
||||
estimated_frequencies: est_freqs,
|
||||
inst_to_block_map,
|
||||
..
|
||||
} = run_analysis(func, reg_universe).map_err(|err| RegAllocError::Analysis(err))?;
|
||||
reg_to_ranges_maps: mb_reg_to_ranges_maps,
|
||||
move_info: mb_move_info,
|
||||
} = run_analysis(
|
||||
func,
|
||||
reg_universe,
|
||||
AlgorithmWithDefaults::Backtracking,
|
||||
client_wants_stackmaps,
|
||||
reftype_class,
|
||||
reftyped_vregs,
|
||||
)
|
||||
.map_err(|err| RegAllocError::Analysis(err))?;
|
||||
|
||||
assert!(reg_vecs_and_bounds.is_sanitized());
|
||||
assert!(frag_env.len() == frag_metrics_env.len());
|
||||
assert!(mb_reg_to_ranges_maps.is_some()); // ensured by `run_analysis`
|
||||
assert!(mb_move_info.is_some()); // ensured by `run_analysis`
|
||||
let reg_to_ranges_maps = mb_reg_to_ranges_maps.unwrap();
|
||||
let move_info = mb_move_info.unwrap();
|
||||
|
||||
// Also perform analysis that finds all coalesing opportunities.
|
||||
// Also perform analysis that finds all coalescing opportunities.
|
||||
let coalescing_info = do_coalescing_analysis(
|
||||
func,
|
||||
®_vecs_and_bounds,
|
||||
®_universe,
|
||||
&rlr_env,
|
||||
&mut vlr_env,
|
||||
&frag_env,
|
||||
&est_freqs,
|
||||
®_universe,
|
||||
®_to_ranges_maps,
|
||||
&move_info,
|
||||
);
|
||||
let mut hints: TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>> = coalescing_info.0;
|
||||
let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> = coalescing_info.1;
|
||||
let is_vv_boundary_move: TypedIxVec<InstIx, bool> = coalescing_info.2;
|
||||
let vreg_to_vlrs_map: Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>> = coalescing_info.3;
|
||||
assert!(hints.len() == vlr_env.len());
|
||||
|
||||
// -------- Alloc main --------
|
||||
|
@ -533,7 +747,8 @@ pub fn alloc_main<F: Function>(
|
|||
// PerRealReg
|
||||
per_real_reg.push(PerRealReg::new());
|
||||
}
|
||||
for rlr in rlr_env.iter() {
|
||||
for (rlrix_no, rlr) in rlr_env.iter().enumerate() {
|
||||
let rlrix = RealRangeIx::new(rlrix_no as u32);
|
||||
let rregIndex = rlr.rreg.get_index();
|
||||
// Ignore RealRanges for RealRegs that are not part of the allocatable
|
||||
// set. As far as the allocator is concerned, such RealRegs simply
|
||||
|
@ -541,7 +756,7 @@ pub fn alloc_main<F: Function>(
|
|||
if rregIndex >= reg_universe.allocable {
|
||||
continue;
|
||||
}
|
||||
per_real_reg[rregIndex].add_RealRange(&rlr, &frag_env);
|
||||
per_real_reg[rregIndex].add_RealRange(rlrix, &rlr_env, &frag_env);
|
||||
}
|
||||
|
||||
let mut edit_list_move = Vec::<EditListItem>::new();
|
||||
|
@ -977,6 +1192,7 @@ pub fn alloc_main<F: Function>(
|
|||
|
||||
let curr_vlr_vreg = curr_vlr.vreg;
|
||||
let curr_vlr_reg = curr_vlr_vreg.to_reg();
|
||||
let curr_vlr_is_ref = curr_vlr.is_ref;
|
||||
|
||||
for frag in &curr_vlr.sorted_frags.frags {
|
||||
for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) {
|
||||
|
@ -1060,6 +1276,21 @@ pub fn alloc_main<F: Function>(
|
|||
}
|
||||
let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap();
|
||||
|
||||
// If we're spilling a reffy VLR, we'll need to tell the spillslot allocator that. The
|
||||
// VLR will already have been allocated to some spill slot, and relevant RangeFrags in
|
||||
// the slot should have already been reserved for it, by the above call to
|
||||
// `alloc_spill_slots` (although possibly relating to a prior VLR in the same
|
||||
// equivalence class, and not this one). However, those RangeFrags will have all been
|
||||
// marked non-reffy, because we don't know, in general, at spillslot-allocation-time,
|
||||
// whether a VLR will actually be spilled, and we don't want the resulting stack maps to
|
||||
// mention stack entries which are dead at the point of the safepoint insn. Hence the
|
||||
// need to update those RangeFrags pertaining to just this VLR -- now that we *know*
|
||||
// it's going to be spilled.
|
||||
if curr_vlr.is_ref {
|
||||
spill_slot_allocator
|
||||
.notify_spillage_of_reftyped_vlr(spill_slot_to_use, &curr_vlr.sorted_frags);
|
||||
}
|
||||
|
||||
for sri in sri_vec {
|
||||
let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind {
|
||||
BridgeKind::RtoU => (Point::Reload, Point::Use),
|
||||
|
@ -1076,6 +1307,7 @@ pub fn alloc_main<F: Function>(
|
|||
vreg: curr_vlr_vreg,
|
||||
rreg: None,
|
||||
sorted_frags: new_vlr_sfrags,
|
||||
is_ref: curr_vlr_is_ref, // "inherit" refness
|
||||
size: 1,
|
||||
// Effectively infinite. We'll never look at this again anyway.
|
||||
total_cost: 0xFFFF_FFFFu32,
|
||||
|
@ -1109,7 +1341,7 @@ pub fn alloc_main<F: Function>(
|
|||
// allocated to the same reg as the destination of the
|
||||
// move. That means we have to find the VLR that owns
|
||||
// the destination vreg.
|
||||
for vlrix in &vreg_to_vlrs_map[dst_vreg.get_index()] {
|
||||
for vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[dst_vreg.get_index()] {
|
||||
if vlr_env[*vlrix].vreg == dst_vreg {
|
||||
new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
|
||||
break;
|
||||
|
@ -1120,7 +1352,7 @@ pub fn alloc_main<F: Function>(
|
|||
// Def-to-Spill bridge. Hint that we want to be
|
||||
// allocated to the same reg as the source of the
|
||||
// move.
|
||||
for vlrix in &vreg_to_vlrs_map[src_vreg.get_index()] {
|
||||
for vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[src_vreg.get_index()] {
|
||||
if vlr_env[*vlrix].vreg == src_vreg {
|
||||
new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
|
||||
break;
|
||||
|
@ -1315,7 +1547,7 @@ pub fn alloc_main<F: Function>(
|
|||
// Reload and spill instructions are missing. To generate them, go through
|
||||
// the "edit list", which contains info on both how to generate the
|
||||
// instructions, and where to insert them.
|
||||
let mut spills_n_reloads = Vec::<InstToInsertAndPoint>::new();
|
||||
let mut spills_n_reloads = Vec::<InstToInsertAndExtPoint>::new();
|
||||
let mut num_spills = 0; // stats only
|
||||
let mut num_reloads = 0; // stats only
|
||||
for eli in &edit_list_other {
|
||||
|
@ -1334,10 +1566,10 @@ pub fn alloc_main<F: Function>(
|
|||
let insnR = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(rreg),
|
||||
from_slot: eli.slot,
|
||||
for_vreg: vreg,
|
||||
for_vreg: Some(vreg),
|
||||
};
|
||||
let whereToR = vlr_frag.first;
|
||||
spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
|
||||
let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
|
||||
num_reloads += 1;
|
||||
}
|
||||
BridgeKind::RtoS => {
|
||||
|
@ -1347,17 +1579,17 @@ pub fn alloc_main<F: Function>(
|
|||
let insnR = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(rreg),
|
||||
from_slot: eli.slot,
|
||||
for_vreg: vreg,
|
||||
for_vreg: Some(vreg),
|
||||
};
|
||||
let whereToR = vlr_frag.first;
|
||||
let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
|
||||
let insnS = InstToInsert::Spill {
|
||||
to_slot: eli.slot,
|
||||
from_reg: rreg,
|
||||
for_vreg: vreg,
|
||||
for_vreg: Some(vreg),
|
||||
};
|
||||
let whereToS = vlr_frag.last;
|
||||
spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
|
||||
spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
|
||||
let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
|
||||
num_reloads += 1;
|
||||
num_spills += 1;
|
||||
}
|
||||
|
@ -1368,10 +1600,10 @@ pub fn alloc_main<F: Function>(
|
|||
let insnS = InstToInsert::Spill {
|
||||
to_slot: eli.slot,
|
||||
from_reg: rreg,
|
||||
for_vreg: vreg,
|
||||
for_vreg: Some(vreg),
|
||||
};
|
||||
let whereToS = vlr_frag.last;
|
||||
spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
|
||||
let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
|
||||
num_spills += 1;
|
||||
}
|
||||
}
|
||||
|
@ -1408,10 +1640,64 @@ pub fn alloc_main<F: Function>(
|
|||
}
|
||||
}
|
||||
|
||||
// There is one of these for every entry in `safepoint_insns`.
|
||||
let mut stackmaps = Vec::<Vec<SpillSlot>>::new();
|
||||
|
||||
if !safepoint_insns.is_empty() {
|
||||
info!("alloc_main: create safepoints and stackmaps");
|
||||
for safepoint_iix in safepoint_insns {
|
||||
// Create the stackmap artefacts for `safepoint_iix`. Save the stackmap (the
|
||||
// reftyped spillslots); we'll have to return it to the client as part of the
|
||||
// overall allocation result. The extra spill and reload instructions can simply
|
||||
// be added to `spills_n_reloads` though, and `edit_inst_stream` will correctly
|
||||
// merge them in.
|
||||
//
|
||||
// Note: this modifies `spill_slot_allocator`, since at this point we have to
|
||||
// allocate spill slots to hold reftyped real regs across the safepoint insn.
|
||||
//
|
||||
// Because the SB (spill-before) and RA (reload-after) `ExtPoint`s are "closer" to
|
||||
// the "core" of an instruction than the R (reload) and S (spill) `ExtPoint`s, any
|
||||
// "normal" reload or spill ranges that are reftyped will be handled correctly.
|
||||
// From `get_stackmap_artefacts_at`s point of view, such spill/reload ranges are
|
||||
// just like any other real-reg live range that it will have to spill around the
|
||||
// safepoint. The fact that they are for spills or reloads doesn't make any
|
||||
// difference.
|
||||
//
|
||||
// Note also: this call can fail; failure is propagated upwards.
|
||||
//
|
||||
// FIXME Passing these 3 small vectors around is inefficient. Use SmallVec or
|
||||
// (better) owned-by-this-function vectors instead.
|
||||
let (spills_before, reloads_after, reftyped_spillslots) = get_stackmap_artefacts_at(
|
||||
&mut spill_slot_allocator,
|
||||
®_universe,
|
||||
reftype_class,
|
||||
®_vecs_and_bounds,
|
||||
&per_real_reg,
|
||||
&rlr_env,
|
||||
&vlr_env,
|
||||
*safepoint_iix,
|
||||
)?;
|
||||
stackmaps.push(reftyped_spillslots);
|
||||
for spill_before in spills_before {
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(
|
||||
spill_before,
|
||||
InstExtPoint::new(*safepoint_iix, ExtPoint::SpillBefore),
|
||||
));
|
||||
}
|
||||
for reload_after in reloads_after {
|
||||
spills_n_reloads.push(InstToInsertAndExtPoint::new(
|
||||
reload_after,
|
||||
InstExtPoint::new(*safepoint_iix, ExtPoint::ReloadAfter),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("alloc_main: edit_inst_stream");
|
||||
|
||||
let final_insns_and_targetmap__or_err = edit_inst_stream(
|
||||
let final_insns_and_targetmap_and_new_safepoints__or_err = edit_inst_stream(
|
||||
func,
|
||||
&safepoint_insns,
|
||||
spills_n_reloads,
|
||||
&iixs_to_nop_out,
|
||||
frag_map,
|
||||
|
@ -1423,7 +1709,7 @@ pub fn alloc_main<F: Function>(
|
|||
|
||||
// ======== BEGIN Create the RegAllocResult ========
|
||||
|
||||
match final_insns_and_targetmap__or_err {
|
||||
match final_insns_and_targetmap_and_new_safepoints__or_err {
|
||||
Ok((ref final_insns, ..)) => {
|
||||
info!(
|
||||
"alloc_main: out: VLRs: {} initially, {} processed",
|
||||
|
@ -1450,16 +1736,17 @@ pub fn alloc_main<F: Function>(
|
|||
}
|
||||
}
|
||||
|
||||
let (final_insns, target_map, orig_insn_map) = match final_insns_and_targetmap__or_err {
|
||||
Err(e) => {
|
||||
info!("alloc_main: fail");
|
||||
return Err(e);
|
||||
}
|
||||
Ok(pair) => {
|
||||
info!("alloc_main: creating RegAllocResult");
|
||||
pair
|
||||
}
|
||||
};
|
||||
let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
|
||||
match final_insns_and_targetmap_and_new_safepoints__or_err {
|
||||
Err(e) => {
|
||||
info!("alloc_main: fail");
|
||||
return Err(e);
|
||||
}
|
||||
Ok(quad) => {
|
||||
info!("alloc_main: creating RegAllocResult");
|
||||
quad
|
||||
}
|
||||
};
|
||||
|
||||
// Compute clobbered registers with one final, quick pass.
|
||||
//
|
||||
|
@ -1475,7 +1762,7 @@ pub fn alloc_main<F: Function>(
|
|||
|
||||
let mut clobbered_registers: Set<RealReg> = Set::empty();
|
||||
|
||||
// We'll dump all the reg uses in here. We don't care the bounds, so just
|
||||
// We'll dump all the reg uses in here. We don't care about the bounds, so just
|
||||
// pass a dummy one in the loop.
|
||||
let mut reg_vecs = RegVecs::new(/*sanitized=*/ false);
|
||||
let mut dummy_bounds = RegVecBounds::new();
|
||||
|
@ -1509,13 +1796,17 @@ pub fn alloc_main<F: Function>(
|
|||
block_annotations = Some(anns);
|
||||
}
|
||||
|
||||
assert!(stackmaps.len() == safepoint_insns.len());
|
||||
assert!(new_safepoint_insns.len() == safepoint_insns.len());
|
||||
let ra_res = RegAllocResult {
|
||||
insns: final_insns,
|
||||
target_map,
|
||||
orig_insn_map,
|
||||
orig_insn_map: new_to_old_insn_map,
|
||||
clobbered_registers,
|
||||
num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32,
|
||||
block_annotations,
|
||||
stackmaps,
|
||||
new_safepoint_insns,
|
||||
};
|
||||
|
||||
info!("alloc_main: end");
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
use crate::avl_tree::{AVLTree, AVL_NULL};
|
||||
use crate::data_structures::{
|
||||
cmp_range_frags, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
|
||||
cmp_range_frags, InstPoint, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
|
||||
VirtualRangeIx,
|
||||
};
|
||||
use crate::union_find::UnionFindEquivClasses;
|
||||
|
@ -28,6 +28,22 @@ use crate::Function;
|
|||
//=============================================================================
|
||||
// Logical spill slots
|
||||
|
||||
// In the trees, we keep track of which frags are reftyped, so we can later create stackmaps by
|
||||
// slicing all of the trees at some `InstPoint`. Unfortunately this requires storing 65 bits of
|
||||
// data in each node -- 64 bits for the RangeFrag and 1 bit for the reftype. A TODO would be to
|
||||
// steal one bit from the RangeFrag. For now though, we do the simple thing.
|
||||
|
||||
#[derive(Clone, PartialEq, PartialOrd)]
|
||||
struct RangeFragAndRefness {
|
||||
frag: RangeFrag,
|
||||
is_ref: bool,
|
||||
}
|
||||
impl RangeFragAndRefness {
|
||||
fn new(frag: RangeFrag, is_ref: bool) -> Self {
|
||||
Self { frag, is_ref }
|
||||
}
|
||||
}
|
||||
|
||||
// We keep one of these for every "logical spill slot" in use.
|
||||
enum LogicalSpillSlot {
|
||||
// This slot is in use and can hold values of size `size` (only). Note that
|
||||
|
@ -36,7 +52,10 @@ enum LogicalSpillSlot {
|
|||
// `SpillSlotAllocator::slots`, the next `size` - 1 entries must be
|
||||
// `Unavail`. This is a hard invariant, violation of which will cause
|
||||
// overlapping spill slots and potential chaos.
|
||||
InUse { size: u32, tree: AVLTree<RangeFrag> },
|
||||
InUse {
|
||||
size: u32,
|
||||
tree: AVLTree<RangeFragAndRefness>,
|
||||
},
|
||||
// This slot is unavailable, as described above. It's unavailable because
|
||||
// it holds some part of the values associated with the nearest lower
|
||||
// numbered entry which isn't `Unavail`, and that entry must be an `InUse`
|
||||
|
@ -53,13 +72,13 @@ impl LogicalSpillSlot {
|
|||
fn is_InUse(&self) -> bool {
|
||||
!self.is_Unavail()
|
||||
}
|
||||
fn get_tree(&self) -> &AVLTree<RangeFrag> {
|
||||
fn get_tree(&self) -> &AVLTree<RangeFragAndRefness> {
|
||||
match self {
|
||||
LogicalSpillSlot::InUse { ref tree, .. } => tree,
|
||||
LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_tree"),
|
||||
}
|
||||
}
|
||||
fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFrag> {
|
||||
fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFragAndRefness> {
|
||||
match self {
|
||||
LogicalSpillSlot::InUse { ref mut tree, .. } => tree,
|
||||
LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_mut_tree"),
|
||||
|
@ -71,6 +90,62 @@ impl LogicalSpillSlot {
|
|||
LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_size"),
|
||||
}
|
||||
}
|
||||
// If this spill slot is occupied at `pt`, return the refness of the value (VirtualRange)
|
||||
// stored in it. This is conceptually equivalent to CommitmentMap::lookup_inst_point.
|
||||
fn get_refness_at_inst_point(&self, pt: InstPoint) -> Option<bool> {
|
||||
match self {
|
||||
LogicalSpillSlot::InUse { size: 1, tree } => {
|
||||
// Search the tree to see if a reffy commitment intersects `pt`.
|
||||
let mut root = tree.root;
|
||||
while root != AVL_NULL {
|
||||
let root_node = &tree.pool[root as usize];
|
||||
let root_item = &root_node.item;
|
||||
if pt < root_item.frag.first {
|
||||
// `pt` is to the left of the `root`. So there's no
|
||||
// overlap with `root`. Continue by inspecting the left subtree.
|
||||
root = root_node.left;
|
||||
} else if root_item.frag.last < pt {
|
||||
// Ditto for the right subtree.
|
||||
root = root_node.right;
|
||||
} else {
|
||||
// `pt` overlaps the `root`, so we have what we want.
|
||||
return Some(root_item.is_ref);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
|
||||
// Slot isn't is use, or is in use but for values of some non-ref size
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HELPER FUNCTION
|
||||
// Find out whether it is possible to add `frag` to `tree`.
|
||||
#[inline(always)]
|
||||
fn ssal_is_add_frag_possible(tree: &AVLTree<RangeFragAndRefness>, frag: &RangeFrag) -> bool {
|
||||
// BEGIN check `frag` for any overlap against `tree`.
|
||||
let mut root = tree.root;
|
||||
while root != AVL_NULL {
|
||||
let root_node = &tree.pool[root as usize];
|
||||
let root_item = &root_node.item;
|
||||
if frag.last < root_item.frag.first {
|
||||
// `frag` is entirely to the left of the `root`. So there's no
|
||||
// overlap with root. Continue by inspecting the left subtree.
|
||||
root = root_node.left;
|
||||
} else if root_item.frag.last < frag.first {
|
||||
// Ditto for the right subtree.
|
||||
root = root_node.right;
|
||||
} else {
|
||||
// `frag` overlaps the `root`. Give up.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// END check `frag` for any overlap against `tree`.
|
||||
// `frag` doesn't overlap.
|
||||
true
|
||||
}
|
||||
|
||||
// HELPER FUNCTION
|
||||
|
@ -81,38 +156,23 @@ impl LogicalSpillSlot {
|
|||
// no guarantee that elements of `frags` don't overlap `tree`. Hence we have
|
||||
// to do a custom walk of `tree` to check for overlap; we can't just use
|
||||
// `AVLTree::contains`.
|
||||
fn ssal_is_add_possible(tree: &AVLTree<RangeFrag>, frags: &SortedRangeFrags) -> bool {
|
||||
fn ssal_is_add_possible(tree: &AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
|
||||
// Figure out whether all the frags will go in.
|
||||
for frag in &frags.frags {
|
||||
// BEGIN check `frag` for any overlap against `tree`.
|
||||
let mut root = tree.root;
|
||||
while root != AVL_NULL {
|
||||
let root_node = &tree.pool[root as usize];
|
||||
let root_frag = root_node.item.clone();
|
||||
if frag.last < root_frag.first {
|
||||
// `frag` is entirely to the left of the `root`. So there's no
|
||||
// overlap with root. Continue by inspecting the left subtree.
|
||||
root = root_node.left;
|
||||
} else if root_frag.last < frag.first {
|
||||
// Ditto for the right subtree.
|
||||
root = root_node.right;
|
||||
} else {
|
||||
// `frag` overlaps the `root`. Give up.
|
||||
return false;
|
||||
}
|
||||
if !ssal_is_add_frag_possible(&tree, frag) {
|
||||
return false;
|
||||
}
|
||||
// END check `frag` for any overlap against `tree`.
|
||||
// `frag` doesn't overlap. Move on to the next one.
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
// HELPER FUNCTION
|
||||
// Try to add all of `frags` to `tree`. Return `true` if possible, `false` if
|
||||
// not possible. If `false` is returned, `tree` is unchanged (this is
|
||||
// important). This routine relies on the fact that SortedFrags is
|
||||
// non-overlapping.
|
||||
fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags) -> bool {
|
||||
// Try to add all of `frags` to `tree`. Return `true` if possible, `false` if not possible. If
|
||||
// `false` is returned, `tree` is unchanged (this is important). This routine relies on the
|
||||
// fact that SortedFrags is non-overlapping. They are initially all marked as non-reffy. That
|
||||
// may later be changed by calls to `SpillSlotAllocator::notify_spillage_of_reftyped_vlr`.
|
||||
fn ssal_add_if_possible(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
|
||||
// Check if all the frags will go in.
|
||||
if !ssal_is_add_possible(tree, frags) {
|
||||
return false;
|
||||
|
@ -120,8 +180,10 @@ fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags)
|
|||
// They will. So now insert them.
|
||||
for frag in &frags.frags {
|
||||
let inserted = tree.insert(
|
||||
frag.clone(),
|
||||
Some(&|frag1, frag2| cmp_range_frags(&frag1, &frag2)),
|
||||
RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false),
|
||||
Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
|
||||
cmp_range_frags(&item1.frag, &item2.frag)
|
||||
}),
|
||||
);
|
||||
// This can't fail
|
||||
assert!(inserted);
|
||||
|
@ -129,6 +191,27 @@ fn ssal_add_if_possible(tree: &mut AVLTree<RangeFrag>, frags: &SortedRangeFrags)
|
|||
true
|
||||
}
|
||||
|
||||
// HELPER FUNCTION
|
||||
// Let `frags` be the RangeFrags for some VirtualRange, that have already been allocated in
|
||||
// `tree`. Mark each such RangeFrag as reffy.
|
||||
fn ssal_mark_frags_as_reftyped(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) {
|
||||
for frag in &frags.frags {
|
||||
// Be paranoid. (1) `frag` must already exist in `tree`. (2) it must not be marked as
|
||||
// reffy.
|
||||
let del_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false);
|
||||
let add_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ true);
|
||||
let replaced_ok = tree.find_and_replace(
|
||||
del_this,
|
||||
add_this,
|
||||
&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
|
||||
cmp_range_frags(&item1.frag, &item2.frag)
|
||||
},
|
||||
);
|
||||
// This assertion effectively encompasses both (1) and (2) above.
|
||||
assert!(replaced_ok);
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// SpillSlotAllocator: public interface
|
||||
|
||||
|
@ -155,9 +238,11 @@ impl SpillSlotAllocator {
|
|||
while self.slots.len() % (req_size as usize) != 0 {
|
||||
self.slots.push(LogicalSpillSlot::Unavail);
|
||||
}
|
||||
// And now the new slot.
|
||||
let dflt = RangeFrag::invalid_value();
|
||||
let tree = AVLTree::<RangeFrag>::new(dflt);
|
||||
// And now the new slot. The `dflt` value is needed by `AVLTree` to initialise storage
|
||||
// slots for tree nodes, but we will never actually see those values. So it doesn't
|
||||
// matter what they are.
|
||||
let dflt = RangeFragAndRefness::new(RangeFrag::invalid_value(), false);
|
||||
let tree = AVLTree::<RangeFragAndRefness>::new(dflt);
|
||||
let res = self.slots.len() as u32;
|
||||
self.slots.push(LogicalSpillSlot::InUse {
|
||||
size: req_size,
|
||||
|
@ -176,6 +261,7 @@ impl SpillSlotAllocator {
|
|||
res
|
||||
}
|
||||
|
||||
// THE MAIN FUNCTION
|
||||
// Allocate spill slots for all the VirtualRanges in `vlrix`s eclass,
|
||||
// including `vlrix` itself. Since we are allocating spill slots for
|
||||
// complete eclasses at once, none of the members of the class should
|
||||
|
@ -191,8 +277,25 @@ impl SpillSlotAllocator {
|
|||
vlrEquivClasses: &UnionFindEquivClasses<VirtualRangeIx>,
|
||||
vlrix: VirtualRangeIx,
|
||||
) {
|
||||
let is_ref = vlr_env[vlrix].is_ref;
|
||||
for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
|
||||
// "None of the VLRs in this equivalence class have an allocated spill slot."
|
||||
// This should be true because we allocate spill slots for all of the members of an
|
||||
// eclass at once.
|
||||
assert!(vlr_slot_env[cand_vlrix].is_none());
|
||||
|
||||
// "All of the VLRs in this eclass have the same ref-ness as this VLR."
|
||||
// Why this is true is a bit subtle. The equivalence classes are computed by
|
||||
// `do_coalescing_analysis`, fundamentally by looking at all the move instructions
|
||||
// and computing the transitive closure induced by them. The ref-ness annotations
|
||||
// on each VLR are computed in `do_reftypes_analysis`, and they are also computed
|
||||
// as a transitive closure on the same move instructions. Hence the results should
|
||||
// be identical.
|
||||
//
|
||||
// With all that said, note that these equivalence classes are *not* guaranteed to
|
||||
// be internally non-overlapping. This is explained in the big block comment at the
|
||||
// top of bt_coalescing_analysis.rs.
|
||||
assert!(vlr_env[cand_vlrix].is_ref == is_ref);
|
||||
}
|
||||
|
||||
// Do this in two passes. It's a bit cumbersome.
|
||||
|
@ -243,6 +346,12 @@ impl SpillSlotAllocator {
|
|||
let req_size = func.get_spillslot_size(vlrix_vreg.get_class(), vlrix_vreg);
|
||||
assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);
|
||||
|
||||
// Sanity check: if the VLR is reftyped, then it must need a 1-word slot
|
||||
// (anything else is nonsensical.)
|
||||
if is_ref {
|
||||
assert!(req_size == 1);
|
||||
}
|
||||
|
||||
// Pass 1: find a slot which can take all VirtualRanges in `vlrix`s
|
||||
// eclass when tested individually.
|
||||
//
|
||||
|
@ -344,4 +453,70 @@ impl SpillSlotAllocator {
|
|||
/*NOTREACHED*/
|
||||
} /* 'pass2_per_equiv_class */
|
||||
}
|
||||
|
||||
// STACKMAP SUPPORT
|
||||
// Mark the `frags` for `slot_no` as being reftyped. They are expected to already exist in
|
||||
// the relevant tree, and not currently be marked as reftyped.
|
||||
pub fn notify_spillage_of_reftyped_vlr(
|
||||
&mut self,
|
||||
slot_no: SpillSlot,
|
||||
frags: &SortedRangeFrags,
|
||||
) {
|
||||
let slot_ix = slot_no.get_usize();
|
||||
assert!(slot_ix < self.slots.len());
|
||||
let slot = &mut self.slots[slot_ix];
|
||||
match slot {
|
||||
LogicalSpillSlot::InUse { size, tree } if *size == 1 => {
|
||||
ssal_mark_frags_as_reftyped(tree, frags)
|
||||
}
|
||||
_ => panic!("SpillSlotAllocator::notify_spillage_of_reftyped_vlr: invalid slot"),
|
||||
}
|
||||
}
|
||||
|
||||
// STACKMAP SUPPORT
|
||||
// Allocate a size-1 (word!) spill slot for `frag` and return it. The slot is marked
|
||||
// reftyped so that a later call to `get_reftyped_spillslots_at_inst_point` will return it.
|
||||
pub fn alloc_reftyped_spillslot_for_frag(&mut self, frag: RangeFrag) -> SpillSlot {
|
||||
for i in 0..self.slots.len() {
|
||||
match &mut self.slots[i] {
|
||||
LogicalSpillSlot::InUse { size: 1, tree } => {
|
||||
if ssal_is_add_frag_possible(&tree, &frag) {
|
||||
// We're in luck.
|
||||
let inserted = tree.insert(
|
||||
RangeFragAndRefness::new(frag, /*is_ref=*/ true),
|
||||
Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
|
||||
cmp_range_frags(&item1.frag, &item2.frag)
|
||||
}),
|
||||
);
|
||||
// This can't fail -- we just checked for it!
|
||||
assert!(inserted);
|
||||
return SpillSlot::new(i as u32);
|
||||
}
|
||||
// Otherwise move on.
|
||||
}
|
||||
LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
|
||||
// Slot isn't is use, or is in use but for values of some non-ref size.
|
||||
// Move on.
|
||||
}
|
||||
}
|
||||
}
|
||||
// We tried all slots, but without success. Add a new one and try again. This time we
|
||||
// must succeed. Calling recursively is a bit stupid in the sense that we then search
|
||||
// again to find the slot we just allocated, but hey.
|
||||
self.add_new_slot(1 /*word*/);
|
||||
self.alloc_reftyped_spillslot_for_frag(frag) // \o/ tailcall \o/
|
||||
}
|
||||
|
||||
// STACKMAP SUPPORT
|
||||
// Examine all the spill slots at `pt` and return those that are reftyped. This is
|
||||
// fundamentally what creates a stack map.
|
||||
pub fn get_reftyped_spillslots_at_inst_point(&self, pt: InstPoint) -> Vec<SpillSlot> {
|
||||
let mut res = Vec::<SpillSlot>::new();
|
||||
for (i, slot) in self.slots.iter().enumerate() {
|
||||
if slot.get_refness_at_inst_point(pt) == Some(true) {
|
||||
res.push(SpillSlot::new(i as u32));
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,10 +57,9 @@
|
|||
|
||||
use crate::analysis_data_flow::get_san_reg_sets_for_insn;
|
||||
use crate::data_structures::{
|
||||
BlockIx, InstIx, InstPoint, Map, Point, RealReg, RealRegUniverse, Reg, RegSets, SpillSlot,
|
||||
VirtualReg, Writable,
|
||||
BlockIx, InstIx, Map, RealReg, RealRegUniverse, Reg, RegSets, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use crate::inst_stream::InstToInsertAndPoint;
|
||||
use crate::inst_stream::{ExtPoint, InstExtPoint, InstToInsertAndExtPoint};
|
||||
use crate::{Function, RegUsageMapper};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
@ -478,10 +477,11 @@ impl Checker {
|
|||
}
|
||||
}
|
||||
|
||||
/// A wrapper around `Checker` that assists its use with `InstsAndPoints` and `Function` together.
|
||||
/// A wrapper around `Checker` that assists its use with `InstToInsertAndExtPoint`s and
|
||||
/// `Function` together.
|
||||
pub(crate) struct CheckerContext {
|
||||
checker: Checker,
|
||||
checker_inst_map: Map<InstPoint, Vec<Inst>>,
|
||||
checker_inst_map: Map<InstExtPoint, Vec<Inst>>,
|
||||
}
|
||||
|
||||
impl CheckerContext {
|
||||
|
@ -490,16 +490,12 @@ impl CheckerContext {
|
|||
pub(crate) fn new<F: Function>(
|
||||
f: &F,
|
||||
ru: &RealRegUniverse,
|
||||
insts_to_add: &Vec<InstToInsertAndPoint>,
|
||||
insts_to_add: &Vec<InstToInsertAndExtPoint>,
|
||||
) -> CheckerContext {
|
||||
let mut checker_inst_map: Map<InstPoint, Vec<Inst>> = Map::default();
|
||||
for &InstToInsertAndPoint {
|
||||
ref inst,
|
||||
ref point,
|
||||
} in insts_to_add
|
||||
{
|
||||
let mut checker_inst_map: Map<InstExtPoint, Vec<Inst>> = Map::default();
|
||||
for &InstToInsertAndExtPoint { ref inst, ref iep } in insts_to_add {
|
||||
let checker_insts = checker_inst_map
|
||||
.entry(point.clone())
|
||||
.entry(iep.clone())
|
||||
.or_insert_with(|| vec![]);
|
||||
checker_insts.push(inst.to_checker_inst());
|
||||
}
|
||||
|
@ -521,8 +517,8 @@ impl CheckerContext {
|
|||
mapper: &RUM,
|
||||
) -> Result<(), CheckerErrors> {
|
||||
let empty = vec![];
|
||||
let pre_point = InstPoint::new(iix, Point::Reload);
|
||||
let post_point = InstPoint::new(iix, Point::Spill);
|
||||
let pre_point = InstExtPoint::new(iix, ExtPoint::Reload);
|
||||
let post_point = InstExtPoint::new(iix, ExtPoint::Spill);
|
||||
|
||||
for checker_inst in self.checker_inst_map.get(&pre_point).unwrap_or(&empty) {
|
||||
debug!("at inst {:?}: pre checker_inst: {:?}", iix, checker_inst);
|
||||
|
|
|
@ -1194,7 +1194,7 @@ pub struct RealRegUniverse {
|
|||
pub regs: Vec<(RealReg, String)>,
|
||||
|
||||
// This is the size of the initial section of `regs` that is available to
|
||||
// the allocator. It must be < `regs`.len().
|
||||
// the allocator. It must be <= `regs`.len().
|
||||
pub allocable: usize,
|
||||
|
||||
// Information about groups of allocable registers. Used to quickly address
|
||||
|
@ -1794,6 +1794,22 @@ impl SortedRangeFragIxs {
|
|||
res.check(fenv);
|
||||
res
|
||||
}
|
||||
|
||||
/// Does this sorted list of range fragments contain the given instruction point?
|
||||
pub fn contains_pt(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>, pt: InstPoint) -> bool {
|
||||
self.frag_ixs
|
||||
.binary_search_by(|&ix| {
|
||||
let frag = &fenv[ix];
|
||||
if pt < frag.first {
|
||||
Ordering::Greater
|
||||
} else if pt >= frag.first && pt <= frag.last {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -1856,6 +1872,21 @@ impl SortedRangeFrags {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Does this sorted list of range fragments contain the given instruction point?
|
||||
pub fn contains_pt(&self, pt: InstPoint) -> bool {
|
||||
self.frags
|
||||
.binary_search_by(|frag| {
|
||||
if pt < frag.first {
|
||||
Ordering::Greater
|
||||
} else if pt >= frag.first && pt <= frag.last {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -1997,19 +2028,27 @@ impl SpillCost {
|
|||
pub struct RealRange {
|
||||
pub rreg: RealReg,
|
||||
pub sorted_frags: SortedRangeFragIxs,
|
||||
pub is_ref: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for RealRange {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "(RR: {:?}, {:?})", self.rreg, self.sorted_frags)
|
||||
write!(
|
||||
fmt,
|
||||
"(RR: {:?}{}, {:?})",
|
||||
self.rreg,
|
||||
if self.is_ref { " REF" } else { "" },
|
||||
self.sorted_frags
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl RealRange {
|
||||
pub fn show_with_rru(&self, univ: &RealRegUniverse) -> String {
|
||||
format!(
|
||||
"(RR: {}, {:?})",
|
||||
"(RR: {}{}, {:?})",
|
||||
self.rreg.to_reg().show_with_rru(univ),
|
||||
if self.is_ref { " REF" } else { "" },
|
||||
self.sorted_frags
|
||||
)
|
||||
}
|
||||
|
@ -2026,6 +2065,7 @@ pub struct VirtualRange {
|
|||
pub vreg: VirtualReg,
|
||||
pub rreg: Option<RealReg>,
|
||||
pub sorted_frags: SortedRangeFrags,
|
||||
pub is_ref: bool,
|
||||
pub size: u16,
|
||||
pub total_cost: u32,
|
||||
pub spill_cost: SpillCost, // == total_cost / size
|
||||
|
@ -2039,7 +2079,12 @@ impl VirtualRange {
|
|||
|
||||
impl fmt::Debug for VirtualRange {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "(VR: {:?},", self.vreg)?;
|
||||
write!(
|
||||
fmt,
|
||||
"(VR: {:?}{},",
|
||||
self.vreg,
|
||||
if self.is_ref { " REF" } else { "" }
|
||||
)?;
|
||||
if self.rreg.is_some() {
|
||||
write!(fmt, " -> {:?}", self.rreg.unwrap())?;
|
||||
}
|
||||
|
@ -2051,6 +2096,109 @@ impl fmt::Debug for VirtualRange {
|
|||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Some auxiliary/miscellaneous data structures that are useful.
|
||||
|
||||
// Mappings from RealRegs and VirtualRegs to the sets of RealRanges and VirtualRanges that
|
||||
// belong to them. These are needed for BT's coalescing analysis and for the dataflow analysis
|
||||
// that supports reftype handling.
|
||||
|
||||
pub struct RegToRangesMaps {
|
||||
// This maps RealReg indices to the set of RealRangeIxs for that RealReg. Valid indices are
|
||||
// real register indices for all non-sanitised real regs; that is,
|
||||
// 0 .. RealRegUniverse::allocable, for ".." having the Rust meaning. The Vecs of
|
||||
// RealRangeIxs are duplicate-free. They are Vec rather than SmallVec because they are often
|
||||
// large, so SmallVec would just be a disadvantage here.
|
||||
pub rreg_to_rlrs_map: Vec</*real reg ix, */ Vec<RealRangeIx>>,
|
||||
|
||||
// This maps VirtualReg indices to the set of VirtualRangeIxs for that VirtualReg. Valid
|
||||
// indices are 0 .. Function::get_num_vregs(). For functions mostly translated from SSA,
|
||||
// most VirtualRegs will have just one VirtualRange, and there are a lot of VirtualRegs in
|
||||
// general. So SmallVec is a definite benefit here.
|
||||
pub vreg_to_vlrs_map: Vec</*virtual reg ix, */ SmallVec<[VirtualRangeIx; 3]>>,
|
||||
}
|
||||
|
||||
// MoveInfo holds info about registers connected by moves. For each, we record the source and
|
||||
// destination of the move, the insn performing the move, and the estimated execution frequency
|
||||
// of the containing block. The moves are not presented in any particular order, but they are
|
||||
// duplicate-free in that each such instruction will be listed only once.
|
||||
|
||||
pub struct MoveInfoElem {
|
||||
pub dst: Reg,
|
||||
pub dst_range: RangeId, // possibly RangeId::invalid_value() if not requested
|
||||
pub src: Reg,
|
||||
pub src_range: RangeId, // possibly RangeId::invalid_value() if not requested
|
||||
pub iix: InstIx,
|
||||
pub est_freq: u32,
|
||||
}
|
||||
|
||||
pub struct MoveInfo {
|
||||
pub moves: Vec<MoveInfoElem>,
|
||||
}
|
||||
|
||||
// Something that can be either a VirtualRangeIx or a RealRangeIx, whilst still being 32 bits
|
||||
// (by stealing one bit from those spaces). Note that the resulting thing no longer denotes a
|
||||
// contiguous index space, and so it has a name that indicates it is an identifier rather than
|
||||
// an index.
|
||||
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
|
||||
pub struct RangeId {
|
||||
// 1 X--(31)--X is a RealRangeIx with value X--(31)--X
|
||||
// 0 X--(31)--X is a VirtualRangeIx with value X--(31)--X
|
||||
bits: u32,
|
||||
}
|
||||
|
||||
impl RangeId {
|
||||
#[inline(always)]
|
||||
pub fn new_real(rlrix: RealRangeIx) -> Self {
|
||||
let n = rlrix.get();
|
||||
assert!(n <= 0x7FFF_FFFF);
|
||||
Self {
|
||||
bits: n | 0x8000_0000,
|
||||
}
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn new_virtual(vlrix: VirtualRangeIx) -> Self {
|
||||
let n = vlrix.get();
|
||||
assert!(n <= 0x7FFF_FFFF);
|
||||
Self { bits: n }
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn is_real(self) -> bool {
|
||||
self.bits & 0x8000_0000 != 0
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn is_virtual(self) -> bool {
|
||||
self.bits & 0x8000_0000 == 0
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn to_real(self) -> RealRangeIx {
|
||||
assert!(self.bits & 0x8000_0000 != 0);
|
||||
RealRangeIx::new(self.bits & 0x7FFF_FFFF)
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn to_virtual(self) -> VirtualRangeIx {
|
||||
assert!(self.bits & 0x8000_0000 == 0);
|
||||
VirtualRangeIx::new(self.bits)
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn invalid_value() -> Self {
|
||||
// Real, and inplausibly huge
|
||||
Self { bits: 0xFFFF_FFFF }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RangeId {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_real() {
|
||||
self.to_real().fmt(fmt)
|
||||
} else {
|
||||
self.to_virtual().fmt(fmt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Test cases
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::checker::Inst as CheckerInst;
|
||||
use crate::checker::{CheckerContext, CheckerErrors};
|
||||
use crate::data_structures::{
|
||||
BlockIx, InstIx, InstPoint, RangeFrag, RealReg, RealRegUniverse, SpillSlot, TypedIxVec,
|
||||
BlockIx, InstIx, InstPoint, Point, RangeFrag, RealReg, RealRegUniverse, SpillSlot, TypedIxVec,
|
||||
VirtualReg, Writable,
|
||||
};
|
||||
use crate::{reg_maps::VrangeRegUsageMapper, Function, RegAllocError};
|
||||
|
@ -17,12 +17,12 @@ pub(crate) enum InstToInsert {
|
|||
Spill {
|
||||
to_slot: SpillSlot,
|
||||
from_reg: RealReg,
|
||||
for_vreg: VirtualReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
},
|
||||
Reload {
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
for_vreg: VirtualReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
},
|
||||
Move {
|
||||
to_reg: Writable<RealReg>,
|
||||
|
@ -76,14 +76,112 @@ impl InstToInsert {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) struct InstToInsertAndPoint {
|
||||
pub(crate) inst: InstToInsert,
|
||||
pub(crate) point: InstPoint,
|
||||
// ExtPoint is an extended version of Point. It plays no role in dataflow analysis or in the
|
||||
// specification of live ranges. It exists only to describe where to place the "extra"
|
||||
// spill/reload instructions required to make stackmap/reftype support work. If there was no
|
||||
// need to support stackmaps/reftypes, ExtPoint would not be needed, and Point would be
|
||||
// adequate.
|
||||
//
|
||||
// Recall that Point can denote 4 places within an instruction, with R < U < D < S:
|
||||
//
|
||||
// * R(eload): this is where any reload insns for the insn itself are
|
||||
// considered to live.
|
||||
//
|
||||
// * U(se): this is where the insn is considered to use values from those of
|
||||
// its register operands that appear in a Read or Modify role.
|
||||
//
|
||||
// * D(ef): this is where the insn is considered to define new values for
|
||||
// those of its register operands that appear in a Write or Modify role.
|
||||
//
|
||||
// * S(pill): this is where any spill insns for the insn itself are considered
|
||||
// to live.
|
||||
//
|
||||
// ExtPoint extends that to six places, by adding a new point in between Reload and Use, and one
|
||||
// between Def and Spill, giving: R < SB < U < D < RA < S:
|
||||
//
|
||||
// * (R)eload: unchanged
|
||||
//
|
||||
// * SB (Spill before): at this point, reftyped regs will be spilled, if this insn is a safepoint
|
||||
//
|
||||
// * (U)se: unchanged
|
||||
//
|
||||
// * (D)ef: unchanged
|
||||
//
|
||||
// * RA (Reload after): at this point, reftyped regs spilled at SB will be reloaded, if needed,
|
||||
// and if this insn is a safepoint
|
||||
//
|
||||
// * (S)pill: unchanged
|
||||
//
|
||||
// From this it can be seen that the SB and RA points are closest to the instruction "core" --
|
||||
// the U and D points. SB and RA describe places where reftyped regs must be spilled/reloaded
|
||||
// around the core. Because the SB-RA range falls inside the R-S range, it means the the
|
||||
// safepoint spill/reload instructions can be added after "normal" spill/reload instructions
|
||||
// have been created, and it doesn't interact with the logic to create those "normal"
|
||||
// spill/reload instructions.
|
||||
//
|
||||
// In the worst case scenario, a value could be reloaded at R, immediately spilled at SB, then
|
||||
// possibly modified in memory at the safepoint proper, reloaded at RA, and spilled at S. That
|
||||
// is considered to be an unlikely scenario, though.
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum ExtPoint {
|
||||
Reload = 0,
|
||||
SpillBefore = 1,
|
||||
Use = 2,
|
||||
Def = 3,
|
||||
ReloadAfter = 4,
|
||||
Spill = 5,
|
||||
}
|
||||
|
||||
impl InstToInsertAndPoint {
|
||||
pub(crate) fn new(inst: InstToInsert, point: InstPoint) -> Self {
|
||||
Self { inst, point }
|
||||
impl ExtPoint {
|
||||
// Promote a Point to an ExtPoint
|
||||
#[inline(always)]
|
||||
pub fn from_point(pt: Point) -> Self {
|
||||
match pt {
|
||||
Point::Reload => ExtPoint::Reload,
|
||||
Point::Use => ExtPoint::Use,
|
||||
Point::Def => ExtPoint::Def,
|
||||
Point::Spill => ExtPoint::Spill,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// As the direct analogy to InstPoint, a InstExtPoint pairs an InstIx with an ExtPoint. In
|
||||
// contrast to InstPoint, these aren't so performance critical, so there's no fancy bit-packed
|
||||
// representation as there is for InstPoint.
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct InstExtPoint {
|
||||
pub iix: InstIx,
|
||||
pub extpt: ExtPoint,
|
||||
}
|
||||
|
||||
impl InstExtPoint {
|
||||
#[inline(always)]
|
||||
pub fn new(iix: InstIx, extpt: ExtPoint) -> Self {
|
||||
Self { iix, extpt }
|
||||
}
|
||||
// Promote an InstPoint to an InstExtPoint
|
||||
#[inline(always)]
|
||||
pub fn from_inst_point(inst_pt: InstPoint) -> Self {
|
||||
InstExtPoint {
|
||||
iix: inst_pt.iix(),
|
||||
extpt: ExtPoint::from_point(inst_pt.pt()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// So, finally, we can specify what we want: an instruction to insert, and a place to insert it.
|
||||
|
||||
pub(crate) struct InstToInsertAndExtPoint {
|
||||
pub(crate) inst: InstToInsert,
|
||||
pub(crate) iep: InstExtPoint,
|
||||
}
|
||||
|
||||
impl InstToInsertAndExtPoint {
|
||||
#[inline(always)]
|
||||
pub(crate) fn new(inst: InstToInsert, iep: InstExtPoint) -> Self {
|
||||
Self { inst, iep }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,7 +194,7 @@ impl InstToInsertAndPoint {
|
|||
fn map_vregs_to_rregs<F: Function>(
|
||||
func: &mut F,
|
||||
frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
|
||||
insts_to_add: &Vec<InstToInsertAndPoint>,
|
||||
insts_to_add: &Vec<InstToInsertAndExtPoint>,
|
||||
iixs_to_nop_out: &Vec<InstIx>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
use_checker: bool,
|
||||
|
@ -391,12 +489,14 @@ fn map_vregs_to_rregs<F: Function>(
|
|||
#[inline(never)]
|
||||
pub(crate) fn add_spills_reloads_and_moves<F: Function>(
|
||||
func: &mut F,
|
||||
mut insts_to_add: Vec<InstToInsertAndPoint>,
|
||||
safepoint_insns: &Vec<InstIx>,
|
||||
mut insts_to_add: Vec<InstToInsertAndExtPoint>,
|
||||
) -> Result<
|
||||
(
|
||||
Vec<F::Inst>,
|
||||
TypedIxVec<BlockIx, InstIx>,
|
||||
TypedIxVec<InstIx, InstIx>,
|
||||
Vec<InstIx>,
|
||||
),
|
||||
String,
|
||||
> {
|
||||
|
@ -407,20 +507,31 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
|
|||
// We also need to examine and update Func::blocks. This is assumed to
|
||||
// be arranged in ascending order of the Block::start fields.
|
||||
//
|
||||
// Also, if the client requested stackmap creation, then `safepoint_insns` will be
|
||||
// non-empty, and we will have to return a vector of the same length, that indicates the
|
||||
// location of each safepoint insn in the final code. `safepoint_insns` is assumed to be
|
||||
// sorted in ascending order and duplicate-free.
|
||||
//
|
||||
// Linear scan relies on the sort being stable here, so make sure to not
|
||||
// use an unstable sort. See the comment in `resolve_moves_across blocks`
|
||||
// in linear scan's code.
|
||||
|
||||
insts_to_add.sort_by_key(|mem_move| mem_move.point);
|
||||
insts_to_add.sort_by_key(|to_add| to_add.iep.clone());
|
||||
|
||||
let mut cur_inst_to_add = 0;
|
||||
let mut cur_block = BlockIx::new(0);
|
||||
|
||||
let mut insns: Vec<F::Inst> = vec![];
|
||||
let mut target_map: TypedIxVec<BlockIx, InstIx> = TypedIxVec::new();
|
||||
let mut orig_insn_map: TypedIxVec<InstIx, InstIx> = TypedIxVec::new();
|
||||
|
||||
let mut new_to_old_insn_map: TypedIxVec<InstIx, InstIx> = TypedIxVec::new();
|
||||
target_map.reserve(func.blocks().len());
|
||||
orig_insn_map.reserve(func.insn_indices().len() + insts_to_add.len());
|
||||
new_to_old_insn_map.reserve(func.insn_indices().len() + insts_to_add.len());
|
||||
|
||||
// Index in `safepoint_insns` of the next safepoint insn we will encounter
|
||||
let mut next_safepoint_insn_index = 0;
|
||||
let mut new_safepoint_insns = Vec::<InstIx>::new();
|
||||
new_safepoint_insns.reserve(safepoint_insns.len());
|
||||
|
||||
for iix in func.insn_indices() {
|
||||
// Is `iix` the first instruction in a block? Meaning, are we
|
||||
|
@ -431,27 +542,33 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
|
|||
target_map.push(InstIx::new(insns.len() as u32));
|
||||
}
|
||||
|
||||
// Copy to the output vector, the extra insts that are to be placed at the
|
||||
// reload point of `iix`.
|
||||
// Copy to the output vector, the first the extra insts that are to be placed at the
|
||||
// reload point of `iix`, and then the extras for the spill-before point of `iix`.
|
||||
while cur_inst_to_add < insts_to_add.len()
|
||||
&& insts_to_add[cur_inst_to_add].point == InstPoint::new_reload(iix)
|
||||
&& insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::SpillBefore)
|
||||
{
|
||||
insns.push(insts_to_add[cur_inst_to_add].inst.construct(func));
|
||||
orig_insn_map.push(InstIx::invalid_value());
|
||||
new_to_old_insn_map.push(InstIx::invalid_value());
|
||||
cur_inst_to_add += 1;
|
||||
}
|
||||
|
||||
// Copy the inst at `iix` itself
|
||||
orig_insn_map.push(iix);
|
||||
if next_safepoint_insn_index < safepoint_insns.len()
|
||||
&& iix == safepoint_insns[next_safepoint_insn_index]
|
||||
{
|
||||
new_safepoint_insns.push(InstIx::new(insns.len() as u32));
|
||||
next_safepoint_insn_index += 1;
|
||||
}
|
||||
new_to_old_insn_map.push(iix);
|
||||
insns.push(func.get_insn(iix).clone());
|
||||
|
||||
// And copy the extra insts that are to be placed at the spill point of
|
||||
// `iix`.
|
||||
// And copy first, the extra insts that are to be placed at the reload-after point
|
||||
// of `iix`, followed by those to be placed at the spill point of `iix`.
|
||||
while cur_inst_to_add < insts_to_add.len()
|
||||
&& insts_to_add[cur_inst_to_add].point == InstPoint::new_spill(iix)
|
||||
&& insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::Spill)
|
||||
{
|
||||
insns.push(insts_to_add[cur_inst_to_add].inst.construct(func));
|
||||
orig_insn_map.push(InstIx::invalid_value());
|
||||
new_to_old_insn_map.push(InstIx::invalid_value());
|
||||
cur_inst_to_add += 1;
|
||||
}
|
||||
|
||||
|
@ -464,8 +581,10 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
|
|||
|
||||
debug_assert!(cur_inst_to_add == insts_to_add.len());
|
||||
debug_assert!(cur_block.get() == func.blocks().len() as u32);
|
||||
debug_assert!(next_safepoint_insn_index == safepoint_insns.len());
|
||||
debug_assert!(new_safepoint_insns.len() == safepoint_insns.len());
|
||||
|
||||
Ok((insns, target_map, orig_insn_map))
|
||||
Ok((insns, target_map, new_to_old_insn_map, new_safepoint_insns))
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -474,7 +593,8 @@ pub(crate) fn add_spills_reloads_and_moves<F: Function>(
|
|||
#[inline(never)]
|
||||
pub(crate) fn edit_inst_stream<F: Function>(
|
||||
func: &mut F,
|
||||
insts_to_add: Vec<InstToInsertAndPoint>,
|
||||
safepoint_insns: &Vec<InstIx>,
|
||||
insts_to_add: Vec<InstToInsertAndExtPoint>,
|
||||
iixs_to_nop_out: &Vec<InstIx>,
|
||||
frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
|
@ -484,6 +604,7 @@ pub(crate) fn edit_inst_stream<F: Function>(
|
|||
Vec<F::Inst>,
|
||||
TypedIxVec<BlockIx, InstIx>,
|
||||
TypedIxVec<InstIx, InstIx>,
|
||||
Vec<InstIx>,
|
||||
),
|
||||
RegAllocError,
|
||||
> {
|
||||
|
@ -496,5 +617,6 @@ pub(crate) fn edit_inst_stream<F: Function>(
|
|||
use_checker,
|
||||
)
|
||||
.map_err(|e| RegAllocError::RegChecker(e))?;
|
||||
add_spills_reloads_and_moves(func, insts_to_add).map_err(|e| RegAllocError::Other(e))
|
||||
add_spills_reloads_and_moves(func, safepoint_insns, insts_to_add)
|
||||
.map_err(|e| RegAllocError::Other(e))
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ mod analysis_main;
|
|||
|
||||
mod analysis_control_flow;
|
||||
mod analysis_data_flow;
|
||||
mod analysis_reftypes;
|
||||
mod avl_tree;
|
||||
mod bt_coalescing_analysis;
|
||||
mod bt_commitment_map;
|
||||
|
@ -266,21 +267,26 @@ pub trait Function {
|
|||
|
||||
/// Generate a spill instruction for insertion into the instruction
|
||||
/// sequence. The associated virtual register (whose value is being spilled)
|
||||
/// is passed so that the client may make decisions about the instruction to
|
||||
/// generate based on the type of value in question. Because the register
|
||||
/// allocator will insert spill instructions at arbitrary points, the
|
||||
/// returned instruction here must not modify the machine's condition codes.
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, for_vreg: VirtualReg) -> Self::Inst;
|
||||
/// is passed, if it exists, so that the client may make decisions about the
|
||||
/// instruction to generate based on the type of value in question. Because
|
||||
/// the register allocator will insert spill instructions at arbitrary points,
|
||||
/// the returned instruction here must not modify the machine's condition codes.
|
||||
fn gen_spill(
|
||||
&self,
|
||||
to_slot: SpillSlot,
|
||||
from_reg: RealReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
) -> Self::Inst;
|
||||
|
||||
/// Generate a reload instruction for insertion into the instruction
|
||||
/// sequence. The associated virtual register (whose value is being loaded)
|
||||
/// is passed as well. The returned instruction must not modify the
|
||||
/// machine's condition codes.
|
||||
/// is passed as well, if it exists. The returned instruction must not modify
|
||||
/// the machine's condition codes.
|
||||
fn gen_reload(
|
||||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
from_slot: SpillSlot,
|
||||
for_vreg: VirtualReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
) -> Self::Inst;
|
||||
|
||||
/// Generate a register-to-register move for insertion into the instruction
|
||||
|
@ -367,6 +373,14 @@ pub struct RegAllocResult<F: Function> {
|
|||
/// call to `allocate_registers`. Creating of these annotations is
|
||||
/// potentially expensive, so don't request them if you don't need them.
|
||||
pub block_annotations: Option<TypedIxVec<BlockIx, Vec<String>>>,
|
||||
|
||||
/// If stackmap support was requested: one stackmap for each of the safepoint instructions
|
||||
/// declared. Otherwise empty.
|
||||
pub stackmaps: Vec<Vec<SpillSlot>>,
|
||||
|
||||
/// If stackmap support was requested: one InstIx for each safepoint instruction declared,
|
||||
/// indicating the corresponding location in the final instruction stream. Otherwise empty.
|
||||
pub new_safepoint_insns: Vec<InstIx>,
|
||||
}
|
||||
|
||||
/// A choice of register allocation algorithm to run.
|
||||
|
@ -444,16 +458,36 @@ impl fmt::Debug for Options {
|
|||
}
|
||||
}
|
||||
|
||||
/// A structure with which callers can request stackmap information.
|
||||
pub struct StackmapRequestInfo {
|
||||
/// The register class that holds reftypes. This may only be RegClass::I32 or
|
||||
/// RegClass::I64, and it must equal the word size of the target architecture.
|
||||
pub reftype_class: RegClass,
|
||||
|
||||
/// The virtual regs that hold reftyped values. These must be provided in ascending order
|
||||
/// of register index and be duplicate-free. They must have class `reftype_class`.
|
||||
pub reftyped_vregs: Vec<VirtualReg>,
|
||||
|
||||
/// The indices of instructions for which the allocator will construct stackmaps. These
|
||||
/// must be provided in ascending order and be duplicate-free. The specified instructions
|
||||
/// may not be coalescable move instructions (as the allocator may remove those) and they
|
||||
/// may not modify any register carrying a reftyped value (they may "def" or "use" them,
|
||||
/// though). The reason is that, at a safepoint, the client's garbage collector may change
|
||||
/// the values of all live references, so it would be meaningless for a safepoint
|
||||
/// instruction also to attempt to do that -- we'd end up with two competing new values.
|
||||
pub safepoint_insns: Vec<InstIx>,
|
||||
}
|
||||
|
||||
/// Allocate registers for a function's code, given a universe of real registers that we are
|
||||
/// allowed to use.
|
||||
/// allowed to use. Optionally, stackmap support may be requested.
|
||||
///
|
||||
/// The control flow graph must not contain any critical edges, that is, any edge coming from a
|
||||
/// block with multiple successors must not flow into a block with multiple predecessors. The
|
||||
/// embedder must have split critical edges before handing over the function to this function.
|
||||
/// Otherwise, an error will be returned.
|
||||
///
|
||||
/// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
|
||||
/// fail, returning an error.
|
||||
/// Allocation may succeed, returning a `RegAllocResult` with the new instruction sequence, or
|
||||
/// it may fail, returning an error.
|
||||
///
|
||||
/// Runtime options can be passed to the allocators, through the use of [Options] for options
|
||||
/// common to all the backends. The choice of algorithm is done by passing a given [Algorithm]
|
||||
|
@ -462,6 +496,7 @@ impl fmt::Debug for Options {
|
|||
pub fn allocate_registers_with_opts<F: Function>(
|
||||
func: &mut F,
|
||||
rreg_universe: &RealRegUniverse,
|
||||
stackmap_info: Option<&StackmapRequestInfo>,
|
||||
opts: Options,
|
||||
) -> Result<RegAllocResult<F>, RegAllocError> {
|
||||
info!("");
|
||||
|
@ -474,10 +509,69 @@ pub fn allocate_registers_with_opts<F: Function>(
|
|||
info!(" {}", s);
|
||||
}
|
||||
}
|
||||
// If stackmap support has been requested, perform some initial sanity checks.
|
||||
if let Some(&StackmapRequestInfo {
|
||||
reftype_class,
|
||||
ref reftyped_vregs,
|
||||
ref safepoint_insns,
|
||||
}) = stackmap_info
|
||||
{
|
||||
if let Algorithm::LinearScan(_) = opts.algorithm {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: not currently available for Linear Scan".to_string(),
|
||||
));
|
||||
}
|
||||
if reftype_class != RegClass::I64 && reftype_class != RegClass::I32 {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: invalid reftype_class".to_string(),
|
||||
));
|
||||
}
|
||||
let num_avail_vregs = func.get_num_vregs();
|
||||
for i in 0..reftyped_vregs.len() {
|
||||
let vreg = &reftyped_vregs[i];
|
||||
if vreg.get_class() != reftype_class {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: invalid vreg class".to_string(),
|
||||
));
|
||||
}
|
||||
if vreg.get_index() >= num_avail_vregs {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: out of range vreg".to_string(),
|
||||
));
|
||||
}
|
||||
if i > 0 && reftyped_vregs[i - 1].get_index() >= vreg.get_index() {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: non-ascending vregs".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
let num_avail_insns = func.insns().len();
|
||||
for i in 0..safepoint_insns.len() {
|
||||
let safepoint_iix = safepoint_insns[i];
|
||||
if safepoint_iix.get() as usize >= num_avail_insns {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: out of range safepoint insn".to_string(),
|
||||
));
|
||||
}
|
||||
if i > 0 && safepoint_insns[i - 1].get() >= safepoint_iix.get() {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: non-ascending safepoint insns".to_string(),
|
||||
));
|
||||
}
|
||||
if func.is_move(func.get_insn(safepoint_iix)).is_some() {
|
||||
return Err(RegAllocError::Other(
|
||||
"stackmap request: safepoint insn is a move insn".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// We can't check here that reftyped regs are not changed by safepoint insns. That is
|
||||
// done deep in the stackmap creation logic, for BT in `get_stackmap_artefacts_at`.
|
||||
}
|
||||
|
||||
let run_checker = opts.run_checker;
|
||||
let res = match &opts.algorithm {
|
||||
Algorithm::Backtracking(opts) => {
|
||||
bt_main::alloc_main(func, rreg_universe, run_checker, opts)
|
||||
bt_main::alloc_main(func, rreg_universe, stackmap_info, run_checker, opts)
|
||||
}
|
||||
Algorithm::LinearScan(opts) => linear_scan::run(func, rreg_universe, run_checker, opts),
|
||||
};
|
||||
|
@ -502,6 +596,7 @@ pub fn allocate_registers_with_opts<F: Function>(
|
|||
pub fn allocate_registers<F: Function>(
|
||||
func: &mut F,
|
||||
rreg_universe: &RealRegUniverse,
|
||||
stackmap_info: Option<&StackmapRequestInfo>,
|
||||
algorithm: AlgorithmWithDefaults,
|
||||
) -> Result<RegAllocResult<F>, RegAllocError> {
|
||||
let algorithm = match algorithm {
|
||||
|
@ -512,7 +607,7 @@ pub fn allocate_registers<F: Function>(
|
|||
algorithm,
|
||||
..Default::default()
|
||||
};
|
||||
allocate_registers_with_opts(func, rreg_universe, opts)
|
||||
allocate_registers_with_opts(func, rreg_universe, stackmap_info, opts)
|
||||
}
|
||||
|
||||
// Facilities to snapshot regalloc inputs and reproduce them in regalloc.rs.
|
||||
|
|
|
@ -11,7 +11,7 @@ use std::env;
|
|||
use std::fmt;
|
||||
|
||||
use crate::data_structures::{BlockIx, InstIx, InstPoint, Point, RealReg, RegVecsAndBounds};
|
||||
use crate::inst_stream::{add_spills_reloads_and_moves, InstToInsertAndPoint};
|
||||
use crate::inst_stream::{add_spills_reloads_and_moves, InstToInsertAndExtPoint};
|
||||
use crate::{
|
||||
checker::CheckerContext, reg_maps::MentionRegUsageMapper, Function, RealRegUniverse,
|
||||
RegAllocError, RegAllocResult, RegClass, Set, SpillSlot, VirtualReg, NUM_REG_CLASSES,
|
||||
|
@ -625,7 +625,7 @@ fn set_registers<F: Function>(
|
|||
virtual_intervals: &Vec<VirtualInterval>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
use_checker: bool,
|
||||
memory_moves: &Vec<InstToInsertAndPoint>,
|
||||
memory_moves: &Vec<InstToInsertAndExtPoint>,
|
||||
) -> Set<RealReg> {
|
||||
info!("set_registers");
|
||||
|
||||
|
@ -751,7 +751,7 @@ fn set_registers<F: Function>(
|
|||
fn apply_registers<F: Function>(
|
||||
func: &mut F,
|
||||
virtual_intervals: &Vec<VirtualInterval>,
|
||||
memory_moves: Vec<InstToInsertAndPoint>,
|
||||
memory_moves: Vec<InstToInsertAndExtPoint>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
num_spill_slots: u32,
|
||||
use_checker: bool,
|
||||
|
@ -766,8 +766,11 @@ fn apply_registers<F: Function>(
|
|||
&memory_moves,
|
||||
);
|
||||
|
||||
let (final_insns, target_map, orig_insn_map) =
|
||||
add_spills_reloads_and_moves(func, memory_moves).map_err(|e| RegAllocError::Other(e))?;
|
||||
let safepoint_insns = vec![];
|
||||
let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
|
||||
add_spills_reloads_and_moves(func, &safepoint_insns, memory_moves)
|
||||
.map_err(|e| RegAllocError::Other(e))?;
|
||||
assert!(new_safepoint_insns.is_empty()); // because `safepoint_insns` is also empty.
|
||||
|
||||
// And now remove from the clobbered registers set, all those not available to the allocator.
|
||||
// But not removing the reserved regs, since we might have modified those.
|
||||
|
@ -782,9 +785,11 @@ fn apply_registers<F: Function>(
|
|||
Ok(RegAllocResult {
|
||||
insns: final_insns,
|
||||
target_map,
|
||||
orig_insn_map,
|
||||
orig_insn_map: new_to_old_insn_map,
|
||||
clobbered_registers,
|
||||
num_spill_slots,
|
||||
block_annotations: None,
|
||||
stackmaps: vec![],
|
||||
new_safepoint_insns,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use super::{next_use, IntId, Location, RegUses, VirtualInterval};
|
||||
use crate::{
|
||||
data_structures::{BlockIx, InstPoint, Point},
|
||||
inst_stream::{InstToInsert, InstToInsertAndPoint},
|
||||
inst_stream::{InstExtPoint, InstToInsert, InstToInsertAndExtPoint},
|
||||
sparse_set::SparseSet,
|
||||
Function, RealReg, Reg, SpillSlot, TypedIxVec, VirtualReg, Writable,
|
||||
};
|
||||
|
@ -17,7 +17,7 @@ fn resolve_moves_in_block<F: Function>(
|
|||
reg_uses: &RegUses,
|
||||
scratches_by_rc: &[Option<RealReg>],
|
||||
spill_slot: &mut u32,
|
||||
moves_in_blocks: &mut Vec<InstToInsertAndPoint>,
|
||||
moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
|
||||
tmp_ordered_moves: &mut Vec<MoveOp>,
|
||||
tmp_stack: &mut Vec<MoveOp>,
|
||||
) {
|
||||
|
@ -132,13 +132,13 @@ fn resolve_moves_in_block<F: Function>(
|
|||
"inblock fixup: {:?} spill {:?} -> {:?} at {:?}",
|
||||
interval.id, rreg, spill, at_inst
|
||||
);
|
||||
spills_at_inst.push(InstToInsertAndPoint::new(
|
||||
spills_at_inst.push(InstToInsertAndExtPoint::new(
|
||||
InstToInsert::Spill {
|
||||
to_slot: spill,
|
||||
from_reg: rreg,
|
||||
for_vreg: vreg,
|
||||
for_vreg: Some(vreg),
|
||||
},
|
||||
at_inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -324,8 +324,8 @@ fn resolve_moves_across_blocks<F: Function>(
|
|||
intervals: &Vec<VirtualInterval>,
|
||||
scratches_by_rc: &[Option<RealReg>],
|
||||
spill_slot: &mut u32,
|
||||
moves_at_block_starts: &mut Vec<InstToInsertAndPoint>,
|
||||
moves_at_block_ends: &mut Vec<InstToInsertAndPoint>,
|
||||
moves_at_block_starts: &mut Vec<InstToInsertAndExtPoint>,
|
||||
moves_at_block_ends: &mut Vec<InstToInsertAndExtPoint>,
|
||||
tmp_ordered_moves: &mut Vec<MoveOp>,
|
||||
tmp_stack: &mut Vec<MoveOp>,
|
||||
) {
|
||||
|
@ -500,7 +500,7 @@ pub(crate) fn run<F: Function>(
|
|||
liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
|
||||
spill_slot: &mut u32,
|
||||
scratches_by_rc: &[Option<RealReg>],
|
||||
) -> Vec<InstToInsertAndPoint> {
|
||||
) -> Vec<InstToInsertAndExtPoint> {
|
||||
info!("resolve_moves");
|
||||
|
||||
// Keep three lists of moves to insert:
|
||||
|
@ -624,14 +624,14 @@ impl MoveOp {
|
|||
MoveOperand::Stack(to) => InstToInsert::Spill {
|
||||
to_slot: to,
|
||||
from_reg: from,
|
||||
for_vreg: self.vreg,
|
||||
for_vreg: Some(self.vreg),
|
||||
},
|
||||
},
|
||||
MoveOperand::Stack(from) => match self.to {
|
||||
MoveOperand::Reg(to) => InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(to),
|
||||
from_slot: from,
|
||||
for_vreg: self.vreg,
|
||||
for_vreg: Some(self.vreg),
|
||||
},
|
||||
MoveOperand::Stack(_to) => unreachable!("stack to stack move"),
|
||||
},
|
||||
|
@ -749,7 +749,7 @@ fn emit_moves(
|
|||
ordered_moves: &Vec<MoveOp>,
|
||||
num_spill_slots: &mut u32,
|
||||
scratches_by_rc: &[Option<RealReg>],
|
||||
moves_in_blocks: &mut Vec<InstToInsertAndPoint>,
|
||||
moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
|
||||
) {
|
||||
let mut spill_slot = None;
|
||||
let mut in_cycle = false;
|
||||
|
@ -770,9 +770,12 @@ fn emit_moves(
|
|||
let inst = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(dst_reg),
|
||||
from_slot: spill_slot.expect("should have a cycle spill slot"),
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
trace!(
|
||||
"finishing cycle: {:?} -> {:?}",
|
||||
spill_slot.unwrap(),
|
||||
|
@ -785,15 +788,21 @@ fn emit_moves(
|
|||
let inst = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(scratch),
|
||||
from_slot: spill_slot.expect("should have a cycle spill slot"),
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
let inst = InstToInsert::Spill {
|
||||
to_slot: dst_spill,
|
||||
from_reg: scratch,
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
trace!(
|
||||
"finishing cycle: {:?} -> {:?} -> {:?}",
|
||||
spill_slot.unwrap(),
|
||||
|
@ -828,9 +837,12 @@ fn emit_moves(
|
|||
let inst = InstToInsert::Spill {
|
||||
to_slot: spill_slot.unwrap(),
|
||||
from_reg: src_reg,
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
trace!("starting cycle: {:?} -> {:?}", src_reg, spill_slot.unwrap());
|
||||
}
|
||||
MoveOperand::Stack(src_spill) => {
|
||||
|
@ -839,15 +851,21 @@ fn emit_moves(
|
|||
let inst = InstToInsert::Reload {
|
||||
to_reg: Writable::from_reg(scratch),
|
||||
from_slot: src_spill,
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
let inst = InstToInsert::Spill {
|
||||
to_slot: spill_slot.expect("should have a cycle spill slot"),
|
||||
from_reg: scratch,
|
||||
for_vreg: mov.vreg,
|
||||
for_vreg: Some(mov.vreg),
|
||||
};
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
trace!(
|
||||
"starting cycle: {:?} -> {:?} -> {:?}",
|
||||
src_spill,
|
||||
|
@ -862,7 +880,10 @@ fn emit_moves(
|
|||
|
||||
// A normal move which is not part of a cycle.
|
||||
let inst = mov.gen_inst();
|
||||
moves_in_blocks.push(InstToInsertAndPoint::new(inst, at_inst));
|
||||
moves_in_blocks.push(InstToInsertAndExtPoint::new(
|
||||
inst,
|
||||
InstExtPoint::from_inst_point(at_inst),
|
||||
));
|
||||
trace!("moving {:?} -> {:?}", mov.from, mov.to);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@ use serde::{Deserialize, Serialize};
|
|||
#[derive(Clone, Debug)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
enum IRInstKind {
|
||||
Spill { vreg: VirtualReg },
|
||||
Reload { vreg: VirtualReg },
|
||||
Spill { vreg: Option<VirtualReg> },
|
||||
Reload { vreg: Option<VirtualReg> },
|
||||
Move { vreg: VirtualReg },
|
||||
ZeroLenNop,
|
||||
UserReturn,
|
||||
|
@ -158,7 +158,12 @@ impl IRSnapshot {
|
|||
}
|
||||
|
||||
pub fn allocate(&mut self, opts: Options) -> Result<RegAllocResult<IRFunction>, RegAllocError> {
|
||||
allocate_registers_with_opts(&mut self.func, &self.reg_universe, opts)
|
||||
allocate_registers_with_opts(
|
||||
&mut self.func,
|
||||
&self.reg_universe,
|
||||
None, /*no stackmap request*/
|
||||
opts,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,7 +258,7 @@ impl Function for IRFunction {
|
|||
&self,
|
||||
_to_slot: SpillSlot,
|
||||
from_reg: RealReg,
|
||||
for_vreg: VirtualReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
) -> Self::Inst {
|
||||
IRInst {
|
||||
reg_uses: vec![from_reg.to_reg()],
|
||||
|
@ -266,7 +271,7 @@ impl Function for IRFunction {
|
|||
&self,
|
||||
to_reg: Writable<RealReg>,
|
||||
_from_slot: SpillSlot,
|
||||
for_vreg: VirtualReg,
|
||||
for_vreg: Option<VirtualReg>,
|
||||
) -> Self::Inst {
|
||||
IRInst {
|
||||
reg_uses: vec![],
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.lock":"66295ad9f17449e9ef5c16b64c9f0fca138ff07e31fb182bdd134099a7d049b4","Cargo.toml":"ddff8c2657f4fd0f83ce3b732cea03b8eb1f434fdce886fba2904cee5b0090d5","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"a50793192bdc1729a786bb456e5ad1e567c7f4b6a0a13ab0e46754e965978e8f","compare-with-main.sh":"2ddfab71ba571055292a29a36c1ede05f64ba094c78495b945d1486bf32ab6d7","examples/dump.rs":"a5944669754d1093c048a3b2e959c8c22e485a8069582d532172a8207e54dce6","examples/simple.rs":"0bbf762ca214815d81a915106efca05a9fa642a7a250c704c188258ec15d2629","src/binary_reader.rs":"d209e8cf15db30cb06e4c23980de775a59db8654aeb7a69bbe432c09f5046f76","src/lib.rs":"62c4b60aae7b7c5018caf68da31f929956f188042fa0715781e6879148f79db1","src/limits.rs":"22649a707c3f894d0381d745f744876a106cacb72d8a9a608cfa7a6f3f1e5631","src/module_resources.rs":"3a2137adb9018a5d5ebcaf274f969e650e184f77e5db62cd9b655cc6e97fdee1","src/operators_validator.rs":"4d98039d738be26670f7fb399e0738dde6caa170c09139badb62190795c78593","src/parser.rs":"061ba728cbf044456c088255c4c633d5bcc630fe9035a21168f068e498e8255c","src/primitives.rs":"c5056a6f6f444cdd4b45d2b7bb332b776088d7b5bc323e3daddeb48110025b25","src/readers/alias_section.rs":"fa64491870d97577bad7f1891aab1be7597a940bc0e2ccfef0c84847e9df3d6d","src/readers/code_section.rs":"bfdd8d5f08ef357679d7bfe6f9735ff4f08925361e0771a6b1b5112a12c62f30","src/readers/data_count_section.rs":"e711720f8205a906794dc7020a656a2ae74e1d9c3823fcdcdbd9d2f3b206c7d7","src/readers/data_section.rs":"f572e7d2589f0bccf5e97d43c1ca3aac103cbd47d139ead6b84b39b5c9d47c0b","src/readers/element_section.rs":"0193c9b7be80a0c18cba9f2d2892dba83819339aaf39a44d44003fec5328196c","src/readers/export_section.rs":"7c74f7a11406a95c162f6ad4f77aafd0b1eee309f33b69f06bea12b23925e143","src/readers/function_section.rs":"57c0479ba8d7f61908ed74e86cbc26553fdd6d2d952f032ce29385a39f82efd3","src/readers/global_section.rs":"5fa18bed0fffadcc2dbdcbaedbe4e4398992fd1ce9e611b0319333a7681082ac","src/readers/import_section.rs":"236e754867ad7829b5a95221051daff3c5df971aff9f2339fa11256f2309d209","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/instance_section.rs":"7b78bbca4b79ac7f9c42455815863a4d32dc074c5973ab1035dbfdf88b0d3c12","src/readers/linking_section.rs":"9df71f3ee5356f0d273c099212213353080001e261ca697caddf6b847fb5af09","src/readers/memory_section.rs":"83212f86cfc40d18fb392e9234c880afdf443f4af38a727ba346f9c740ef8718","src/readers/mod.rs":"d80ba76d763b06ae6e570f09a312b20006f0b83b5cd01d6baab496006fe9b7f1","src/readers/module.rs":"04f6e0bb7250f86037f30754d95a2941623272002184f372ed159db19f52dc7e","src/readers/module_code_section.rs":"aa9cf64f65e43ea5fcf9e695b7b1ba5a45b92d537f7ccef379a07162108ce9d9","src/readers/module_section.rs":"7a0c0b34478ec32030c5400df2366914c7e08ba799440a8c5ea999755c489e7f","src/readers/name_section.rs":"23b5106b17744833fb8cd61cb102e756bccb4a44594d34a5dd8b7930307ac4cb","src/readers/operators.rs":"1defc15f364775018ffe8c7f010ff83342c46659f780be4ba88c58fad7606e03","src/readers/producers_section.rs":"674f402fc4545c94487f827153871b37adab44ed5eff4070a436eb18e514023a","src/readers/reloc_section.rs":"0ef818a8b83a4542c4c29c23642436a92d3e7c37bc0248e817ed5a9d65ec38ce","src/readers/section_reader.rs":"f27f017938bb8602954298d053cd3b79d8876f9fcbbe0e1a3380051b6aa4584a","src/readers/sourcemappingurl_section.rs":"eff317f6f2b728a98a5eb68eec7e6cf222d27158d0d5597fd1c84f09b1092a50","src/readers/start_section.rs":"012fe574a5b94ea34c9d689629fb0df2f5ba4c11c835147b39155f5a8c715e34","src/readers/table_section.rs":"e564876825a7b31df2b5dc850279b523e26dc50a08da935cc8d635a49e809951","src/readers/type_section.rs":"c2f9d7b77a1315d323bebe94ced44dc10b77c0e75c1e367bb594a402c74933ba","src/tests.rs":"5d47ec97d0a303d8cbe905f8ddcf11212a03607e0b245c9f52371464e7d08ee7","src/validator.rs":"bec65fde1d8b98d80d082067a6ccf006f35e3f06062cac97887bd5a04ef75192"},"package":"721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"}
|
||||
{"files":{"Cargo.lock":"097d8d38fce861128185d43320f25aacc4f306c7a273427f0cc53460d2985c64","Cargo.toml":"38db996a0283398e7353969aa161779c7bd8160c8ced694497b12977b5983c95","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"fd8556367534c5aa04960dc8c053dd50e531d2fbe6234b811d5e3ae95649e463","compare-with-main.sh":"2ddfab71ba571055292a29a36c1ede05f64ba094c78495b945d1486bf32ab6d7","examples/simple.rs":"606072a46c5c80df29da3ecd98a989feb1289243550033cd3c3e1df6045ce8ce","src/binary_reader.rs":"b99ceb7182a2581e8834a9a6a7f351bdf0e0f9fbbbcdc4af3bfb9c506c3fc219","src/lib.rs":"414ed00613d315875c0e84750fa0086a1a67e776a9f40637f547bf99f1cc7f0f","src/limits.rs":"76226dcbb57180a399d1e5dfaed0b2516728ab09d91852e3f9aa3adebf06b3b7","src/module_resources.rs":"3a2137adb9018a5d5ebcaf274f969e650e184f77e5db62cd9b655cc6e97fdee1","src/operators_validator.rs":"5fb8f7611f5bf3115016ae81cc4e1e1e0ac4605725df0c9131f0c5d5d1a9514f","src/parser.rs":"c05a92a04020e990635728101e3f7d6653ccdeb54f2ce3615fc6c058b344cd8e","src/primitives.rs":"f93340797ff49370c13a489dc34832771156bb38541b19cb56e371e9a2e099b2","src/readers/alias_section.rs":"ef6556c3e300549958010aba9f1a0f6852c80ceddc763b4c11463d97234488b3","src/readers/code_section.rs":"ab19a5ed2a72e85e8365a8f6915ebbc10ca07b72a097322c53c36fdfb13bd57c","src/readers/data_section.rs":"d919a22ebc44c53ca434df6c1d16efc8e126e7b55ed99691d5ae73c10bfadfff","src/readers/element_section.rs":"f168a3cb02439aeaa81621525e2747d3bc4743fac2237dcdf8658b67e010ca08","src/readers/export_section.rs":"3fe296f1789e789009a79115052194a1352d947f2a8830945d6b7d9983bb8579","src/readers/function_section.rs":"5467d7a375c22a4cc225819212e616f275ef01516f185b346eae2ffbb5c53cb3","src/readers/global_section.rs":"359450911ac662503f90288798baec2415df6d3b80990a7b75794683df7894b8","src/readers/import_section.rs":"80906451f78c64d31a76772d97c96d18d208eeabaaaf82372b0567a8991795c1","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/instance_section.rs":"0f6cc9ed6bb6520493090eff5bbd6a9030ba1432206799b7dfed0b9285bafd73","src/readers/linking_section.rs":"db3091a48827a5b035e2f79f40f7ed9a7ea10acd4db6ab2bbd01e17a65a4265e","src/readers/memory_section.rs":"67d8457d3167b39fc9ae2c04f3c3e28bc10be97bbdaccd681675fb8d3eba2bd3","src/readers/mod.rs":"0fbaa4e1d7e3e68d8857fd8b3ce5c3fba886a1b460cd37398afcbf4802280b4e","src/readers/module_code_section.rs":"806eea527c84570ca6c5b8ca556f95727edae7da29e4f384b067113231b8e5f5","src/readers/module_section.rs":"6e28be8f1f4d2f1a9470ec356c46a76c2f21916143a1f0e245b96d2272d0301e","src/readers/name_section.rs":"60d4aa007cfdc16eedc1b4cb0bee560f6eebd82aaa81e9be9c844e515b16e445","src/readers/operators.rs":"3800f0321a776ddc5e8fb030828e4f2a65ebafa4b7f0808774384559ddfe49ea","src/readers/producers_section.rs":"77f93449e4bdcd61e4c79e47a685742f49cd5dac837ba002bce14120f14c9470","src/readers/reloc_section.rs":"e48e6acaa5145d6fbe1d74eb406ee59c43235faa47fbf0b07288504e60573a5e","src/readers/section_reader.rs":"e99763ce9c48994fd1e92f011a449936c4206a5c91d50fa580d003b6cc824ec5","src/readers/table_section.rs":"5d94185f68c4c7526a8836a2ebdb5b20fe754af58b68d2d0eb8fea62b7e6fe71","src/readers/type_section.rs":"87a54d238bf900aac0d0508e5c644d71b1e591df99367587feb68146a25a5a61","src/validator.rs":"8d401bdac4f8ecdd477a9fe5b979d82b49c4b988194a9a4f1253bafd15e36b11","src/validator/func.rs":"69508e0b0cfde783ca3635070496573b65a4b3ce9c39fe5afb0b6af19e346b2b"},"package":"a950e6a618f62147fd514ff445b2a0b53120d382751960797f85f058c7eda9b9"}
|
|
@ -208,9 +208,9 @@ checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177"
|
|||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.14"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909"
|
||||
checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -247,9 +247,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.71"
|
||||
version = "0.2.72"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
|
||||
checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -274,9 +274,9 @@ checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
|
|||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.5.4"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8"
|
||||
checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
@ -470,9 +470,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.33"
|
||||
version = "1.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd"
|
||||
checksum = "936cae2873c940d92e697597c5eee105fb570cd5689c695806f672883653349b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -577,7 +577,7 @@ checksum = "7f7b90ea6c632dd06fd765d44542e234d5e63d9bb917ecd64d79778a13bd79ae"
|
|||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.58.0"
|
||||
version = "0.59.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"criterion",
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче