зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1641504: Bump Cranelift to e3d89c8a92a5fadedd75359b8485d23ac45ecf29. r=bbouvier
Differential Revision: https://phabricator.services.mozilla.com/D78587
This commit is contained in:
Родитель
e12b4c3ef8
Коммит
6944be383a
|
@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
|
|||
[source."https://github.com/bytecodealliance/wasmtime"]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
replace-with = "vendored-sources"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
|
||||
[source."https://github.com/badboy/failure"]
|
||||
git = "https://github.com/badboy/failure"
|
||||
|
|
|
@ -763,22 +763,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
dependencies = [
|
||||
"cranelift-entity 0.63.0",
|
||||
"cranelift-entity 0.64.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.63.0",
|
||||
"cranelift-entity 0.64.0",
|
||||
"log",
|
||||
"regalloc",
|
||||
"smallvec",
|
||||
|
@ -788,17 +788,17 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-meta"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
dependencies = [
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.63.0",
|
||||
"cranelift-entity 0.64.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
|
@ -807,13 +807,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"log",
|
||||
|
@ -823,15 +823,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"cranelift-entity 0.63.0",
|
||||
"cranelift-entity 0.64.0",
|
||||
"cranelift-frontend",
|
||||
"log",
|
||||
"thiserror",
|
||||
"wasmparser 0.51.4",
|
||||
"wasmparser 0.57.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3956,9 +3956,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.21"
|
||||
version = "0.0.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b27b256b41986ac5141b37b8bbba85d314fbf546c182eb255af6720e07e4f804"
|
||||
checksum = "cca5b48c9db66c5ba084e4660b4c0cfe8b551a96074bc04b7c11de86ad0bf1f9"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
|
@ -5375,9 +5375,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
|
|||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.51.4"
|
||||
version = "0.57.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aeb1956b19469d1c5e63e459d29e7b5aa0f558d9f16fcef09736f8a265e6c10a"
|
||||
checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
|
||||
|
||||
[[package]]
|
||||
name = "wast"
|
||||
|
|
|
@ -76,8 +76,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f
|
|||
|
||||
[patch.crates-io.cranelift-codegen]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
|
||||
[patch.crates-io.cranelift-wasm]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
|
|
|
@ -13,8 +13,8 @@ name = "baldrdash"
|
|||
# cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
|
||||
# you should update the following $TOP_LEVEL/Cargo.toml file: look for the
|
||||
# revision (rev) hashes of both cranelift dependencies (codegen and wasm).
|
||||
cranelift-codegen = { version = "0.63.0", default-features = false }
|
||||
cranelift-wasm = "0.63.0"
|
||||
cranelift-codegen = { version = "0.64.0", default-features = false }
|
||||
cranelift-wasm = "0.64.0"
|
||||
log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
|
||||
env_logger = "0.6"
|
||||
smallvec = "1.0"
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"07d7670bb6f0c26fa3abb5d547d645b8b6ab32378dba33e3453122c8ba59c6b5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
||||
{"files":{"Cargo.toml":"fe108380fdfaac0d92a92302d0751df182b888e874e56e465f4241dbb670a92e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "A forest of B+-trees"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-bforest"
|
||||
|
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-entity = { path = "../entity", version = "0.63.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.64.0", default-features = false }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"2d1fae4231bb7d3c43ebcaccbc62d243440ab537a5b6bd40c653ece0bcda5a75","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"b275053977c0239211c1df35253154ba4dce2519f506088e71104de37d3db862","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"fd5a7418fa0d47cdf1b823b31553f1549c03e160ffffac9e22d611185774367e","src/isa/x86/encodings.rs":"a19e5dd7ba7fe74f2ec0a2367e61e2dab498113f8b2a2f1bc677b6ee486358d5","src/isa/x86/instructions.rs":"144e83591444115f2ab8d16777e322eb5c9d8eef123ad05d0c66811a029b662b","src/isa/x86/legalize.rs":"d2eb6cee5c885870250417f4d9086527c96f994542c9316baf14776b500e45b0","src/isa/x86/mod.rs":"65953f998ff3fc3b333167e9979fc0f15f976b51ad75272ac19dcaad0981b371","src/isa/x86/opcodes.rs":"44556abfc4a319a6e48aa878f10550b7878725ba0bf75ddc9bb6a0e6f4223c73","src/isa/x86/recipes.rs":"f142ae4ea1db29df0f3c9aedf0c5ee228682136526499f0c85aab101375d0c8c","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"49abb46533b3a5415cd033e0a98b5c9561e231f2dd9510d587dc69b204bb6706","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"89ed4074f748637adf56b93ba952e398c45d43e6326d01676885939e3fe8bc4a","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"8df3abeb47b52b7dc99f6e0bb16cf8a695ce4fe0a8d86035945a2612d1aa5a6d","src/shared/legalize.rs":"bc9c3292446c1d338df1c4ce19f3ac5482cfe582a04a5a1e82fc9aaa6aef25ea","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"9460758f04ccfc9129ea4d4081571fe4a3ac574c3d25b6473f888fbbb506b9d3","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
||||
{"files":{"Cargo.toml":"a19ba59829e25d67120787a454038986a6759f7d592dcf427924ebbcb5de6697","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"87c70a4856bb1c40ba6babed549aa7e01478375244dea605be0334ae6d0441e0","src/isa/x86/instructions.rs":"a2c81ff80e30980fe444aa1e56ba57c54911cee67c392c16bfbdf28f75151dc6","src/isa/x86/legalize.rs":"b5f68ea089c4237c7140ef0b8ff71f7c6a5f53884bf2158d81b52d3750bcacac","src/isa/x86/mod.rs":"ecc1d4de51bd44dbaa864fafebb68f66bc99fb8c9ad67a0fcb420bd1f87d1524","src/isa/x86/opcodes.rs":"f98dd104910efbfa3c211080c68a17da607ce585b9d81bf22cb255e58e51f99f","src/isa/x86/recipes.rs":"b71a3746ed39b08932dc1a0ce885b61eec2e8daf2e92d12eccc0d085e4587a1f","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"69623c2193458c838617e52e88d3ff91b71f3f07aec1f1494c0cabd7c332ad49","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38b9a3b09bd86d020b841abe94eef003063b2cb12d9dc991a7743b2cc0bb3362","src/shared/legalize.rs":"55b186e09383cc16491a6a0dd79aa9149c1aba1927a7173701478818b8116795","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"0b4f903de5f2df19304c44bf4bd456c3a8e165103b38ccb13b6f88ae8a3c7ee8","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
|
@ -1,19 +1,19 @@
|
|||
[package]
|
||||
name = "cranelift-codegen-meta"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "Metaprogram for cranelift-codegen code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
readme = "README.md"
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--document-private-items" ]
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.63.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.63.0" }
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.64.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.64.0" }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--document-private-items" ]
|
||||
|
|
|
@ -874,17 +874,32 @@ fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
|
|||
args.join(", ")
|
||||
);
|
||||
|
||||
let imms_need_sign_extension = format
|
||||
.imm_fields
|
||||
.iter()
|
||||
.any(|f| f.kind.rust_type == "ir::immediates::Imm64");
|
||||
|
||||
fmt.doc_comment(format.to_string());
|
||||
fmt.line("#[allow(non_snake_case)]");
|
||||
fmtln!(fmt, "fn {} {{", proto);
|
||||
fmt.indent(|fmt| {
|
||||
// Generate the instruction data.
|
||||
fmtln!(fmt, "let data = ir::InstructionData::{} {{", format.name);
|
||||
fmtln!(
|
||||
fmt,
|
||||
"let{} data = ir::InstructionData::{} {{",
|
||||
if imms_need_sign_extension { " mut" } else { "" },
|
||||
format.name
|
||||
);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode,");
|
||||
gen_member_inits(format, fmt);
|
||||
});
|
||||
fmtln!(fmt, "};");
|
||||
|
||||
if imms_need_sign_extension {
|
||||
fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
|
||||
}
|
||||
|
||||
fmt.line("self.build(data, ctrl_typevar)");
|
||||
});
|
||||
fmtln!(fmt, "}");
|
||||
|
|
|
@ -64,7 +64,7 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
|
|||
|
||||
// R-type with an immediate shift amount instead of rs2.
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4)
|
||||
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![gpr])
|
||||
.emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
|
||||
|
@ -79,11 +79,11 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
|
|||
);
|
||||
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4)
|
||||
EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![gpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm,
|
||||
&*formats.binary_imm64,
|
||||
"imm",
|
||||
12,
|
||||
0,
|
||||
|
|
|
@ -689,6 +689,12 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
|
|||
}
|
||||
}
|
||||
}
|
||||
for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
|
||||
e.enc_both(
|
||||
bint.bind(*to).bind(*from),
|
||||
rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
|
||||
);
|
||||
}
|
||||
|
||||
// Copy Special
|
||||
// For x86-64, only define REX forms for now, since we can't describe the
|
||||
|
@ -1448,6 +1454,7 @@ fn define_alu(
|
|||
// x86 has a bitwise not instruction NOT.
|
||||
e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
|
||||
e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
|
||||
e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
|
||||
|
||||
// Also add a `b1` encodings for the logic instructions.
|
||||
// TODO: Should this be done with 8-bit instructions? It would improve partial register
|
||||
|
@ -1487,8 +1494,13 @@ fn define_alu(
|
|||
for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
|
||||
// Cannot use enc_i32_i64 for this pattern because instructions require
|
||||
// to bind any.
|
||||
e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
|
||||
e.enc32(
|
||||
inst.bind(I32).bind(Any),
|
||||
inst.bind(I32).bind(I16),
|
||||
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
|
||||
);
|
||||
e.enc32(
|
||||
inst.bind(I32).bind(I32),
|
||||
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
|
||||
);
|
||||
e.enc64(
|
||||
|
@ -1601,8 +1613,11 @@ fn define_simd(
|
|||
let sadd_sat = shared.by_name("sadd_sat");
|
||||
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||
let sload8x8 = shared.by_name("sload8x8");
|
||||
let sload8x8_complex = shared.by_name("sload8x8_complex");
|
||||
let sload16x4 = shared.by_name("sload16x4");
|
||||
let sload16x4_complex = shared.by_name("sload16x4_complex");
|
||||
let sload32x2 = shared.by_name("sload32x2");
|
||||
let sload32x2_complex = shared.by_name("sload32x2_complex");
|
||||
let spill = shared.by_name("spill");
|
||||
let sqrt = shared.by_name("sqrt");
|
||||
let sshr_imm = shared.by_name("sshr_imm");
|
||||
|
@ -1611,11 +1626,15 @@ fn define_simd(
|
|||
let store_complex = shared.by_name("store_complex");
|
||||
let uadd_sat = shared.by_name("uadd_sat");
|
||||
let uload8x8 = shared.by_name("uload8x8");
|
||||
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||
let uload16x4 = shared.by_name("uload16x4");
|
||||
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
||||
let uload32x2 = shared.by_name("uload32x2");
|
||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||
let ushr_imm = shared.by_name("ushr_imm");
|
||||
let usub_sat = shared.by_name("usub_sat");
|
||||
let vconst = shared.by_name("vconst");
|
||||
let vselect = shared.by_name("vselect");
|
||||
let x86_insertps = x86.by_name("x86_insertps");
|
||||
let x86_movlhps = x86.by_name("x86_movlhps");
|
||||
let x86_movsd = x86.by_name("x86_movsd");
|
||||
|
@ -1626,6 +1645,8 @@ fn define_simd(
|
|||
let x86_pmaxu = x86.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86.by_name("x86_pmins");
|
||||
let x86_pminu = x86.by_name("x86_pminu");
|
||||
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||
let x86_pmuludq = x86.by_name("x86_pmuludq");
|
||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
|
@ -1636,6 +1657,7 @@ fn define_simd(
|
|||
let x86_punpckl = x86.by_name("x86_punpckl");
|
||||
|
||||
// Shorthands for recipes.
|
||||
let rec_blend = r.template("blend");
|
||||
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
||||
let rec_f_ib = r.template("f_ib");
|
||||
let rec_fa = r.template("fa");
|
||||
|
@ -1705,6 +1727,20 @@ fn define_simd(
|
|||
e.enc_both_inferred(instruction, template);
|
||||
}
|
||||
|
||||
// SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
|
||||
// either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
|
||||
// for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let opcode = match ty.lane_bits() {
|
||||
32 => &BLENDVPS,
|
||||
64 => &BLENDVPD,
|
||||
_ => &PBLENDVB,
|
||||
};
|
||||
let instruction = vselect.bind(vector(ty, sse_vector_size));
|
||||
let template = rec_blend.opcodes(opcode);
|
||||
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
|
||||
}
|
||||
|
||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||
// written to the low doubleword of the register and the register is zero-extended to 128 bits."
|
||||
|
@ -1977,6 +2013,35 @@ fn define_simd(
|
|||
}
|
||||
}
|
||||
|
||||
// SIMD load extend (complex addressing)
|
||||
let is_load_complex_length_two =
|
||||
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
|
||||
for (inst, opcodes) in &[
|
||||
(uload8x8_complex, &PMOVZXBW),
|
||||
(uload16x4_complex, &PMOVZXWD),
|
||||
(uload32x2_complex, &PMOVZXDQ),
|
||||
(sload8x8_complex, &PMOVSXBW),
|
||||
(sload16x4_complex, &PMOVSXWD),
|
||||
(sload32x2_complex, &PMOVSXDQ),
|
||||
] {
|
||||
for recipe in &[
|
||||
rec_fldWithIndex,
|
||||
rec_fldWithIndexDisp8,
|
||||
rec_fldWithIndexDisp32,
|
||||
] {
|
||||
let template = recipe.opcodes(*opcodes);
|
||||
let predicate = |encoding: EncodingBuilder| {
|
||||
encoding
|
||||
.isa_predicate(use_sse41_simd)
|
||||
.inst_predicate(is_load_complex_length_two.clone())
|
||||
};
|
||||
e.enc32_func(inst.clone(), template.clone(), predicate);
|
||||
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
|
||||
e.enc64_func(inst.clone(), template.rex(), predicate);
|
||||
e.enc64_func(inst.clone(), template, predicate);
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD integer addition
|
||||
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
||||
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
||||
|
@ -2036,12 +2101,14 @@ fn define_simd(
|
|||
e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
||||
}
|
||||
|
||||
// SIMD multiplication with lane expansion.
|
||||
e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
|
||||
|
||||
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||
{
|
||||
let imul = imul.bind(vector(I64, sse_vector_size));
|
||||
e.enc_32_64_maybe_isap(
|
||||
imul,
|
||||
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
|
||||
x86_pmullq,
|
||||
rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
|
||||
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
|
||||
);
|
||||
}
|
||||
|
@ -2117,8 +2184,11 @@ fn define_simd(
|
|||
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
|
||||
|
||||
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
||||
// One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
|
||||
if *ty != I64 {
|
||||
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD integer comparisons
|
||||
|
@ -2223,8 +2293,7 @@ fn define_entity_ref(
|
|||
let rec_gvaddr8 = r.template("gvaddr8");
|
||||
let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
|
||||
let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
|
||||
let rec_spaddr4_id = r.template("spaddr4_id");
|
||||
let rec_spaddr8_id = r.template("spaddr8_id");
|
||||
let rec_spaddr_id = r.template("spaddr_id");
|
||||
|
||||
// Predicates shorthands.
|
||||
let all_ones_funcaddrs_and_not_is_pic =
|
||||
|
@ -2312,8 +2381,8 @@ fn define_entity_ref(
|
|||
//
|
||||
// TODO: Add encoding rules for stack_load and stack_store, so that they
|
||||
// don't get legalized to stack_addr + load/store.
|
||||
e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
|
||||
e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
|
||||
e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
|
||||
e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
|
||||
|
||||
// Constant addresses (PIC).
|
||||
e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
|
||||
|
|
|
@ -283,7 +283,7 @@ pub(crate) fn define(
|
|||
Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
|
||||
register and re-orders the data according to the passed immediate byte.
|
||||
"#,
|
||||
&formats.extract_lane,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
|
@ -314,7 +314,7 @@ pub(crate) fn define(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.extract_lane,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -342,9 +342,9 @@ pub(crate) fn define(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.insert_lane,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -369,9 +369,9 @@ pub(crate) fn define(
|
|||
extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
|
||||
floats, which are already stored in an XMM register.
|
||||
"#,
|
||||
&formats.insert_lane,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -475,10 +475,11 @@ pub(crate) fn define(
|
|||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing one large integer (the upper lane is concatenated with \
|
||||
the lower lane to form the integer)",
|
||||
let I128 = &TypeVar::new(
|
||||
"I128",
|
||||
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
|
||||
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
|
||||
upper lane is concatenated with the lower lane to form the integer)",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
|
@ -487,7 +488,7 @@ pub(crate) fn define(
|
|||
);
|
||||
|
||||
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
|
||||
let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
|
||||
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
|
||||
let a = &Operand::new("a", IxN);
|
||||
|
||||
ig.push(
|
||||
|
@ -532,6 +533,47 @@ pub(crate) fn define(
|
|||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing two 64-bit integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I64x2);
|
||||
let y = &Operand::new("y", I64x2);
|
||||
let a = &Operand::new("a", I64x2);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmullq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
|
||||
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
|
||||
encodings for CPUs with newer vector features.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmuludq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
|
||||
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
|
||||
overflow as in `x86_pmullq`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let f = &Operand::new("f", iflags);
|
||||
|
|
|
@ -8,7 +8,7 @@ use crate::shared::Definitions as SharedDefinitions;
|
|||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
let mut group = TransformGroupBuilder::new(
|
||||
let mut expand = TransformGroupBuilder::new(
|
||||
"x86_expand",
|
||||
r#"
|
||||
Legalize instructions by expansion.
|
||||
|
@ -18,6 +18,37 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("expand_flags").id);
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
let mut narrow_avx = TransformGroupBuilder::new(
|
||||
"x86_narrow_avx",
|
||||
r#"
|
||||
Legalize instructions by narrowing with CPU feature checks.
|
||||
|
||||
This special case converts using x86 AVX instructions where available."#,
|
||||
)
|
||||
.isa("x86");
|
||||
// We cannot chain with the x86_narrow group until this group is built, see bottom of this
|
||||
// function for where this is chained.
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
// List of instructions.
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
|
@ -37,6 +68,8 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let isub = insts.by_name("isub");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ireduce = insts.by_name("ireduce");
|
||||
let popcnt = insts.by_name("popcnt");
|
||||
let sdiv = insts.by_name("sdiv");
|
||||
let selectif = insts.by_name("selectif");
|
||||
|
@ -45,6 +78,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let tls_value = insts.by_name("tls_value");
|
||||
let udiv = insts.by_name("udiv");
|
||||
let umulhi = insts.by_name("umulhi");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let urem = insts.by_name("urem");
|
||||
|
||||
|
@ -55,14 +89,40 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
|
||||
// the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
|
||||
// not encodable.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ishl_by_i64 = ishl.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ishl_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ushr_by_i64 = ushr.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ushr_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
// Division and remainder.
|
||||
//
|
||||
// The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||
// allowed to trap. The other ops need to check avoid_div_traps.
|
||||
group.custom_legalize(sdiv, "expand_sdivrem");
|
||||
group.custom_legalize(srem, "expand_sdivrem");
|
||||
group.custom_legalize(udiv, "expand_udivrem");
|
||||
group.custom_legalize(urem, "expand_udivrem");
|
||||
expand.custom_legalize(sdiv, "expand_sdivrem");
|
||||
expand.custom_legalize(srem, "expand_sdivrem");
|
||||
expand.custom_legalize(udiv, "expand_udivrem");
|
||||
expand.custom_legalize(urem, "expand_udivrem");
|
||||
|
||||
// Double length (widening) multiplication.
|
||||
let a = var("a");
|
||||
|
@ -73,12 +133,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let res_lo = var("res_lo");
|
||||
let res_hi = var("res_hi");
|
||||
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(res_hi = umulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
|
||||
);
|
||||
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(res_hi = smulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
|
||||
);
|
||||
|
@ -97,7 +157,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
|
||||
|
||||
// Equality needs an explicit `ord` test which checks the parity bit.
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_eq, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_ord, x, y)),
|
||||
|
@ -105,7 +165,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
def!(a = band(a1, a2)),
|
||||
],
|
||||
);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_ne, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_uno, x, y)),
|
||||
|
@ -130,20 +190,20 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
(floatcc_ugt, floatcc_ult),
|
||||
(floatcc_uge, floatcc_ule),
|
||||
] {
|
||||
group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
}
|
||||
|
||||
// We need to modify the CFG for min/max legalization.
|
||||
group.custom_legalize(fmin, "expand_minmax");
|
||||
group.custom_legalize(fmax, "expand_minmax");
|
||||
expand.custom_legalize(fmin, "expand_minmax");
|
||||
expand.custom_legalize(fmax, "expand_minmax");
|
||||
|
||||
// Conversions from unsigned need special handling.
|
||||
group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
// Conversions from float to int can trap and modify the control flow graph.
|
||||
group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
|
||||
// Count leading and trailing zeroes, for baseline x86_64
|
||||
let c_minus_one = var("c_minus_one");
|
||||
|
@ -158,7 +218,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
let imm64_minus_one = Literal::constant(&imm.imm64, -1);
|
||||
let imm64_63 = Literal::constant(&imm.imm64, 63);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = clz.I64(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
|
@ -170,7 +230,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_31 = Literal::constant(&imm.imm64, 31);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = clz.I32(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
|
@ -182,7 +242,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_64 = Literal::constant(&imm.imm64, 64);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = ctz.I64(x)),
|
||||
vec![
|
||||
def!(c_sixty_four = iconst(imm64_64)),
|
||||
|
@ -192,7 +252,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_32 = Literal::constant(&imm.imm64, 32);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(a = ctz.I32(x)),
|
||||
vec![
|
||||
def!(c_thirty_two = iconst(imm64_32)),
|
||||
|
@ -225,7 +285,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
|
||||
let imm64_1 = Literal::constant(&imm.imm64, 1);
|
||||
let imm64_4 = Literal::constant(&imm.imm64, 4);
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I64(x)),
|
||||
vec![
|
||||
def!(qv3 = ushr_imm(x, imm64_1)),
|
||||
|
@ -266,7 +326,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let lc0F = var("lc0F");
|
||||
let lc01 = var("lc01");
|
||||
|
||||
group.legalize(
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I32(x)),
|
||||
vec![
|
||||
def!(lv3 = ushr_imm(x, imm64_1)),
|
||||
|
@ -289,31 +349,27 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
],
|
||||
);
|
||||
|
||||
group.custom_legalize(ineg, "convert_ineg");
|
||||
|
||||
group.custom_legalize(tls_value, "expand_tls_value");
|
||||
|
||||
group.build_and_add_to(&mut shared.transform_groups);
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
expand.custom_legalize(ineg, "convert_ineg");
|
||||
expand.custom_legalize(tls_value, "expand_tls_value");
|
||||
widen.custom_legalize(ineg, "convert_ineg");
|
||||
widen.build_and_add_to(&mut shared.transform_groups);
|
||||
|
||||
// To reduce compilation times, separate out large blocks of legalizations by
|
||||
// theme.
|
||||
define_simd(shared, x86_instructions);
|
||||
// To reduce compilation times, separate out large blocks of legalizations by theme.
|
||||
define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
|
||||
|
||||
expand.build_and_add_to(&mut shared.transform_groups);
|
||||
let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
narrow_avx
|
||||
.chain_with(narrow_id)
|
||||
.build_and_add_to(&mut shared.transform_groups);
|
||||
widen.build_and_add_to(&mut shared.transform_groups);
|
||||
}
|
||||
|
||||
fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
fn define_simd(
|
||||
shared: &mut SharedDefinitions,
|
||||
x86_instructions: &InstructionGroup,
|
||||
narrow: &mut TransformGroupBuilder,
|
||||
narrow_avx: &mut TransformGroupBuilder,
|
||||
) {
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let band_not = insts.by_name("band_not");
|
||||
|
@ -330,6 +386,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
let icmp = insts.by_name("icmp");
|
||||
let imax = insts.by_name("imax");
|
||||
let imin = insts.by_name("imin");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let insertlane = insts.by_name("insertlane");
|
||||
let ishl = insts.by_name("ishl");
|
||||
|
@ -349,6 +406,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_packss = x86_instructions.by_name("x86_packss");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
|
@ -364,16 +422,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
|
||||
let imm = &shared.imm;
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
// Set up variables and immediates.
|
||||
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||
|
@ -430,7 +478,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
// Move into the lowest 16 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Insert the value again but in the next lowest 16 bits.
|
||||
def!(b = insertlane(a, uimm8_one, x)),
|
||||
def!(b = insertlane(a, x, uimm8_one)),
|
||||
// No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
|
||||
def!(c = raw_bitcast_any16x8_to_i32x4(b)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
|
@ -464,7 +512,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
// Move into the lowest 64 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Move into the highest 64 bits of the same XMM register.
|
||||
def!(y = insertlane(a, uimm8_one, x)),
|
||||
def!(y = insertlane(a, x, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
@ -493,8 +541,8 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
);
|
||||
}
|
||||
|
||||
// SIMD shift right (arithmetic)
|
||||
for ty in &[I16, I32, I64] {
|
||||
// SIMD shift right (arithmetic, i16x8 and i32x4)
|
||||
for ty in &[I16, I32] {
|
||||
let sshr = sshr.bind(vector(*ty, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
narrow.legalize(
|
||||
|
@ -502,6 +550,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i8x16)
|
||||
{
|
||||
let sshr = sshr.bind(vector(I8, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
|
@ -526,6 +575,25 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i64x2)
|
||||
{
|
||||
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
|
||||
let sshr_scalar_lane0 = sshr.bind(I64);
|
||||
let sshr_scalar_lane1 = sshr.bind(I64);
|
||||
narrow.legalize(
|
||||
def!(z = sshr_vector(x, y)),
|
||||
vec![
|
||||
// Use scalar operations to shift the first lane.
|
||||
def!(a = extractlane(x, uimm8_zero)),
|
||||
def!(b = sshr_scalar_lane0(a, y)),
|
||||
def!(c = insertlane(x, b, uimm8_zero)),
|
||||
// Do the same for the second lane.
|
||||
def!(d = extractlane(x, uimm8_one)),
|
||||
def!(e = sshr_scalar_lane1(d, y)),
|
||||
def!(z = insertlane(c, e, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD select
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
|
@ -540,6 +608,17 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
);
|
||||
}
|
||||
|
||||
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
|
||||
// This works, because each lane of boolean vector is filled with zeroes or ones.
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vselect = vselect.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(d = vselect(c, x, y)),
|
||||
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vany_true
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
|
@ -709,5 +788,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||
narrow.custom_legalize(ushr, "convert_ushr");
|
||||
narrow.custom_legalize(ishl, "convert_ishl");
|
||||
|
||||
narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
// This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
|
||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::cdsl::cpu_modes::CpuMode;
|
||||
use crate::cdsl::isa::TargetIsa;
|
||||
use crate::cdsl::types::ReferenceType;
|
||||
use crate::cdsl::types::{ReferenceType, VectorType};
|
||||
|
||||
use crate::shared::types::Bool::B1;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
|
@ -35,6 +35,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
|
||||
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
|
||||
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
|
||||
let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
|
||||
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
|
||||
|
||||
x86_32.legalize_monomorphic(expand_flags);
|
||||
|
@ -46,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
|
||||
x86_32.legalize_type(F32, x86_expand);
|
||||
x86_32.legalize_type(F64, x86_expand);
|
||||
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
|
||||
x86_64.legalize_monomorphic(expand_flags);
|
||||
x86_64.legalize_default(x86_narrow);
|
||||
|
@ -57,6 +59,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
|
||||
x86_64.legalize_type(F32, x86_expand);
|
||||
x86_64.legalize_type(F64, x86_expand);
|
||||
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
|
||||
let recipes = recipes::define(shared_defs, &settings, ®s);
|
||||
|
||||
|
|
|
@ -54,6 +54,14 @@ pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
|
|||
/// Bit scan reverse (stores index of first encountered 1 from the back).
|
||||
pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
|
||||
|
||||
/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
|
||||
|
||||
/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
|
||||
|
||||
/// Call near, relative, displacement relative to next instruction (sign-extended).
|
||||
pub static CALL_RELATIVE: [u8; 1] = [0xe8];
|
||||
|
||||
|
@ -335,6 +343,10 @@ pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
|
|||
/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
|
||||
|
||||
/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
|
||||
/// in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||
|
||||
|
@ -459,7 +471,11 @@ pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
|||
|
||||
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
|
||||
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||
pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
|
||||
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
|
||||
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
|
||||
|
||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||
pub static POP_REG: [u8; 1] = [0x58];
|
||||
|
|
|
@ -427,6 +427,7 @@ pub(crate) fn define<'shared>(
|
|||
let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
|
||||
let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
|
||||
let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
|
||||
let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
|
||||
|
||||
// Stack operand with a 32-bit signed displacement from either RBP or RSP.
|
||||
let stack_gpr32 = Stack::new(gpr);
|
||||
|
@ -607,12 +608,12 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r with FPR ins and outs. A form with a byte immediate.
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
|
||||
EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
|
||||
.operands_in(vec![fpr, fpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.insert_lane,
|
||||
"lane",
|
||||
&*formats.ternary_imm8,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
))
|
||||
|
@ -620,7 +621,7 @@ pub(crate) fn define<'shared>(
|
|||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
let imm:i64 = lane.into();
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -904,14 +905,32 @@ pub(crate) fn define<'shared>(
|
|||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
|
||||
);
|
||||
|
||||
// XX /r for BLEND* instructions
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
|
||||
.operands_in(vec![
|
||||
OperandConstraint::FixedReg(reg_xmm0),
|
||||
OperandConstraint::RegClass(fpr),
|
||||
OperandConstraint::RegClass(fpr),
|
||||
])
|
||||
.operands_out(vec![2])
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
|
||||
modrm_rr(in_reg1, in_reg2, sink);
|
||||
"#,
|
||||
),
|
||||
"size_with_inferred_rex_for_inreg1_inreg2",
|
||||
);
|
||||
|
||||
// XX /n ib with 8-bit immediate sign-extended.
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
|
||||
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm,
|
||||
&*formats.binary_imm64,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
|
@ -928,11 +947,11 @@ pub(crate) fn define<'shared>(
|
|||
);
|
||||
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
|
||||
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm,
|
||||
&*formats.binary_imm64,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
|
@ -951,11 +970,11 @@ pub(crate) fn define<'shared>(
|
|||
// XX /n id with 32-bit immediate sign-extended.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
|
||||
EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm,
|
||||
&*formats.binary_imm64,
|
||||
"imm",
|
||||
32,
|
||||
0,
|
||||
|
@ -977,20 +996,20 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![fpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.extract_lane,
|
||||
"lane",
|
||||
&*formats.binary_imm8,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
)) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
|
||||
))
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
let imm:i64 = lane.into();
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -1001,17 +1020,17 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![gpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.extract_lane, "lane", 8, 0,
|
||||
&*formats.binary_imm8, "imm", 8, 0,
|
||||
))
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
|
||||
modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
|
||||
let imm:i64 = lane.into();
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
), "size_with_inferred_rex_for_inreg0_outreg0"
|
||||
|
@ -1021,12 +1040,12 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
|
||||
.operands_in(vec![fpr, gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.insert_lane,
|
||||
"lane",
|
||||
&*formats.ternary_imm8,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
))
|
||||
|
@ -1034,7 +1053,7 @@ pub(crate) fn define<'shared>(
|
|||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
let imm:i64 = lane.into();
|
||||
let imm: i64 = imm.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -1432,23 +1451,7 @@ pub(crate) fn define<'shared>(
|
|||
// TODO Alternative forms for 8-bit immediates, when applicable.
|
||||
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6)
|
||||
.operands_out(vec![gpr])
|
||||
.emit(
|
||||
r#"
|
||||
let sp = StackRef::sp(stack_slot, &func.stack_slots);
|
||||
let base = stk_base(sp.base);
|
||||
{{PUT_OP}}(bits, rex2(out_reg0, base), sink);
|
||||
modrm_sib_disp8(out_reg0, sink);
|
||||
sib_noindex(base, sink);
|
||||
let imm : i32 = offset.into();
|
||||
sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
|
||||
"#,
|
||||
),
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6)
|
||||
EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
|
||||
.operands_out(vec![gpr])
|
||||
.emit(
|
||||
r#"
|
||||
|
@ -2871,12 +2874,12 @@ pub(crate) fn define<'shared>(
|
|||
|
||||
{
|
||||
let has_small_offset =
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0);
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
|
||||
|
||||
// XX /n, MI form with imm8.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2)
|
||||
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![reg_rflags])
|
||||
.inst_predicate(has_small_offset)
|
||||
|
@ -2894,12 +2897,12 @@ pub(crate) fn define<'shared>(
|
|||
);
|
||||
|
||||
let has_big_offset =
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0);
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
|
||||
|
||||
// XX /n, MI form with imm32.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5)
|
||||
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![reg_rflags])
|
||||
.inst_predicate(has_big_offset)
|
||||
|
|
|
@ -3,6 +3,12 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
|
|||
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
let mut settings = SettingGroupBuilder::new("x86");
|
||||
|
||||
settings.add_bool(
|
||||
"use_new_backend",
|
||||
"Whether to use the new codegen backend using the new isel",
|
||||
false,
|
||||
);
|
||||
|
||||
// CPUID.01H:ECX
|
||||
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
|
||||
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::rc::Rc;
|
|||
|
||||
pub(crate) struct Formats {
|
||||
pub(crate) binary: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm64: Rc<InstructionFormat>,
|
||||
pub(crate) branch: Rc<InstructionFormat>,
|
||||
pub(crate) branch_float: Rc<InstructionFormat>,
|
||||
pub(crate) branch_icmp: Rc<InstructionFormat>,
|
||||
|
@ -17,14 +17,13 @@ pub(crate) struct Formats {
|
|||
pub(crate) cond_trap: Rc<InstructionFormat>,
|
||||
pub(crate) copy_special: Rc<InstructionFormat>,
|
||||
pub(crate) copy_to_ssa: Rc<InstructionFormat>,
|
||||
pub(crate) extract_lane: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm8: Rc<InstructionFormat>,
|
||||
pub(crate) float_compare: Rc<InstructionFormat>,
|
||||
pub(crate) float_cond: Rc<InstructionFormat>,
|
||||
pub(crate) float_cond_trap: Rc<InstructionFormat>,
|
||||
pub(crate) func_addr: Rc<InstructionFormat>,
|
||||
pub(crate) heap_addr: Rc<InstructionFormat>,
|
||||
pub(crate) indirect_jump: Rc<InstructionFormat>,
|
||||
pub(crate) insert_lane: Rc<InstructionFormat>,
|
||||
pub(crate) int_compare: Rc<InstructionFormat>,
|
||||
pub(crate) int_compare_imm: Rc<InstructionFormat>,
|
||||
pub(crate) int_cond: Rc<InstructionFormat>,
|
||||
|
@ -45,6 +44,7 @@ pub(crate) struct Formats {
|
|||
pub(crate) store_complex: Rc<InstructionFormat>,
|
||||
pub(crate) table_addr: Rc<InstructionFormat>,
|
||||
pub(crate) ternary: Rc<InstructionFormat>,
|
||||
pub(crate) ternary_imm8: Rc<InstructionFormat>,
|
||||
pub(crate) trap: Rc<InstructionFormat>,
|
||||
pub(crate) unary: Rc<InstructionFormat>,
|
||||
pub(crate) unary_bool: Rc<InstructionFormat>,
|
||||
|
@ -76,7 +76,9 @@ impl Formats {
|
|||
|
||||
binary: Builder::new("Binary").value().value().build(),
|
||||
|
||||
binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(),
|
||||
binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
|
||||
|
||||
binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
|
||||
|
||||
// The select instructions are controlled by the second VALUE operand.
|
||||
// The first VALUE operand is the controlling flag which has a derived type.
|
||||
|
@ -88,23 +90,18 @@ impl Formats {
|
|||
.typevar_operand(1)
|
||||
.build(),
|
||||
|
||||
ternary_imm8: Builder::new("TernaryImm8")
|
||||
.value()
|
||||
.imm(&imm.uimm8)
|
||||
.value()
|
||||
.build(),
|
||||
|
||||
// Catch-all for instructions with many outputs and inputs and no immediate
|
||||
// operands.
|
||||
multiary: Builder::new("MultiAry").varargs().build(),
|
||||
|
||||
nullary: Builder::new("NullAry").build(),
|
||||
|
||||
insert_lane: Builder::new("InsertLane")
|
||||
.value()
|
||||
.imm_with_name("lane", &imm.uimm8)
|
||||
.value()
|
||||
.build(),
|
||||
|
||||
extract_lane: Builder::new("ExtractLane")
|
||||
.value()
|
||||
.imm_with_name("lane", &imm.uimm8)
|
||||
.build(),
|
||||
|
||||
shuffle: Builder::new("Shuffle")
|
||||
.value()
|
||||
.value()
|
||||
|
|
|
@ -559,9 +559,9 @@ fn define_simd_lane_access(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.insert_lane,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -579,7 +579,7 @@ fn define_simd_lane_access(
|
|||
may or may not be zeroed depending on the ISA but the type system should prevent using
|
||||
``a`` as anything other than the extracted value.
|
||||
"#,
|
||||
&formats.extract_lane,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -1172,6 +1172,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload8x8_complex",
|
||||
r#"
|
||||
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i16x8 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload8x8",
|
||||
|
@ -1186,6 +1200,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload8x8_complex",
|
||||
r#"
|
||||
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i16x8 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let I32x4 = &TypeVar::new(
|
||||
"I32x4",
|
||||
"A SIMD vector with exactly 4 lanes of 32-bit values",
|
||||
|
@ -1201,7 +1229,7 @@ pub(crate) fn define(
|
|||
Inst::new(
|
||||
"uload16x4",
|
||||
r#"
|
||||
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
||||
Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
||||
vector.
|
||||
"#,
|
||||
&formats.load,
|
||||
|
@ -1211,6 +1239,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload16x4_complex",
|
||||
r#"
|
||||
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i32x4 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload16x4",
|
||||
|
@ -1225,6 +1267,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload16x4_complex",
|
||||
r#"
|
||||
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i32x4 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector with exactly 2 lanes of 64-bit values",
|
||||
|
@ -1250,6 +1306,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload32x2_complex",
|
||||
r#"
|
||||
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i64x2 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload32x2",
|
||||
|
@ -1264,6 +1334,20 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload32x2_complex",
|
||||
r#"
|
||||
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i64x2 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", Mem).with_doc("Value to be stored");
|
||||
let a = &Operand::new("a", Mem).with_doc("Value loaded");
|
||||
let Offset =
|
||||
|
@ -2131,7 +2215,7 @@ pub(crate) fn define(
|
|||
Like `icmp_imm`, but returns integer CPU flags instead of testing
|
||||
a specific condition code.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![f]),
|
||||
|
@ -2181,7 +2265,7 @@ pub(crate) fn define(
|
|||
This is similar to `iadd` but the operands are interpreted as signed integers and their
|
||||
summed result, instead of wrapping, will be saturated to the lowest or highest
|
||||
signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
|
||||
since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
|
||||
since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
|
||||
clamped to `0x7F`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
|
@ -2376,7 +2460,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2391,7 +2475,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2405,7 +2489,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2421,7 +2505,7 @@ pub(crate) fn define(
|
|||
representable in `B` bits two's complement. This only happens
|
||||
when `x = -2^{B-1}, Y = -1`.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2435,7 +2519,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2449,7 +2533,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2468,7 +2552,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2868,7 +2952,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2885,7 +2969,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2902,7 +2986,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2947,7 +3031,7 @@ pub(crate) fn define(
|
|||
r#"
|
||||
Rotate left by immediate.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2959,7 +3043,7 @@ pub(crate) fn define(
|
|||
r#"
|
||||
Rotate right by immediate.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3034,7 +3118,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of ``x``.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3048,7 +3132,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of the register.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3062,7 +3146,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of the register.
|
||||
"#,
|
||||
&formats.binary_imm,
|
||||
&formats.binary_imm64,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
|
|
@ -61,6 +61,7 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
let cls = insts.by_name("cls");
|
||||
let clz = insts.by_name("clz");
|
||||
let ctz = insts.by_name("ctz");
|
||||
let copy = insts.by_name("copy");
|
||||
let fabs = insts.by_name("fabs");
|
||||
let f32const = insts.by_name("f32const");
|
||||
let f64const = insts.by_name("f64const");
|
||||
|
@ -198,8 +199,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
let ah = var("ah");
|
||||
let cc = var("cc");
|
||||
let block = var("block");
|
||||
let block1 = var("block1");
|
||||
let block2 = var("block2");
|
||||
let ptr = var("ptr");
|
||||
let flags = var("flags");
|
||||
let offset = var("off");
|
||||
|
@ -212,8 +211,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
// embedded as part of arguments), so use a custom legalization for now.
|
||||
narrow.custom_legalize(iconst, "narrow_iconst");
|
||||
|
||||
{
|
||||
let inst = uextend.bind(I128).bind(I64);
|
||||
for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
|
||||
let inst = uextend.bind(ty).bind(ty_half);
|
||||
narrow.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![
|
||||
|
@ -223,12 +222,12 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
);
|
||||
}
|
||||
|
||||
{
|
||||
let inst = sextend.bind(I128).bind(I64);
|
||||
for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
|
||||
let inst = sextend.bind(ty).bind(ty_half);
|
||||
narrow.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![
|
||||
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, 63))), // splat sign bit to whole number
|
||||
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
|
||||
def!(a = iconcat(x, ah)),
|
||||
],
|
||||
);
|
||||
|
@ -268,39 +267,45 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
],
|
||||
);
|
||||
|
||||
narrow.legalize(
|
||||
def!(brz.I128(x, block, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(
|
||||
a = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xl,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(
|
||||
b = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xh,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(c = band(a, b)),
|
||||
def!(brnz(c, block, vararg)),
|
||||
],
|
||||
);
|
||||
for &ty in &[I128, I64] {
|
||||
let block = var("block");
|
||||
let block1 = var("block1");
|
||||
let block2 = var("block2");
|
||||
|
||||
narrow.legalize(
|
||||
def!(brnz.I128(x, block1, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(brnz(xl, block1, vararg)),
|
||||
def!(jump(block2, Literal::empty_vararg())),
|
||||
block!(block2),
|
||||
def!(brnz(xh, block1, vararg)),
|
||||
],
|
||||
);
|
||||
narrow.legalize(
|
||||
def!(brz.ty(x, block, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(
|
||||
a = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xl,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(
|
||||
b = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xh,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(c = band(a, b)),
|
||||
def!(brnz(c, block, vararg)),
|
||||
],
|
||||
);
|
||||
|
||||
narrow.legalize(
|
||||
def!(brnz.ty(x, block1, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(brnz(xl, block1, vararg)),
|
||||
def!(jump(block2, Literal::empty_vararg())),
|
||||
block!(block2),
|
||||
def!(brnz(xh, block1, vararg)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.legalize(
|
||||
def!(a = popcnt.I128(x)),
|
||||
|
@ -629,6 +634,14 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
);
|
||||
}
|
||||
|
||||
for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
|
||||
let inst = ireduce.bind(ty_half).bind(ty);
|
||||
expand.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
|
||||
);
|
||||
}
|
||||
|
||||
// Expand integer operations with carry for RISC architectures that don't have
|
||||
// the flags.
|
||||
let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
|
||||
|
|
|
@ -25,11 +25,14 @@ pub(crate) fn define() -> SettingGroup {
|
|||
- `experimental_linear_scan` is an experimental linear scan allocator. It may take less
|
||||
time to allocate registers, but generated code's quality may be inferior. As of
|
||||
2020-04-17, it is still experimental and it should not be used in production settings.
|
||||
- `experimental_linear_scan_checked` is the linear scan allocator with additional self
|
||||
checks that may take some time to run, and thus these checks are disabled by default.
|
||||
"#,
|
||||
vec![
|
||||
"backtracking",
|
||||
"backtracking_checked",
|
||||
"experimental_linear_scan",
|
||||
"experimental_linear_scan_checked",
|
||||
],
|
||||
);
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"702a281a26cf7099e1b3ca5e8bea145c113f52242be4f1e7e5b06bf129092599","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
||||
{"files":{"Cargo.toml":"d3026bf5426d767b0b23f0a4f6272aaeb68f598a92f6c788c1f6948153fa63c3","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "Low-level code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-codegen"
|
||||
|
@ -13,25 +13,27 @@ build = "build.rs"
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.63.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.63.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.63.0" }
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.64.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.64.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.64.0" }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
target-lexicon = "0.10"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
gimli = { version = "0.20.0", default-features = false, features = ["write"], optional = true }
|
||||
gimli = { version = "0.21.0", default-features = false, features = ["write"], optional = true }
|
||||
smallvec = { version = "1.0.0" }
|
||||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
regalloc = "0.0.21"
|
||||
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.1.0" }
|
||||
regalloc = "0.0.25"
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
# accomodated in `tests`.
|
||||
|
||||
[build-dependencies]
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.63.0" }
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.64.0" }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.64.0" }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
|
@ -58,10 +60,12 @@ x86 = []
|
|||
arm32 = []
|
||||
arm64 = []
|
||||
riscv = []
|
||||
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
|
||||
|
||||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
"x64",
|
||||
"arm32",
|
||||
"arm64",
|
||||
"riscv"
|
||||
|
@ -70,5 +74,12 @@ all-arch = [
|
|||
# For dependent crates that want to serialize some parts of cranelift
|
||||
enable-serde = ["serde"]
|
||||
|
||||
# Recompile our optimizations that are written in the `peepmatic` DSL into a
|
||||
# compact finite-state transducer automaton.
|
||||
rebuild-peephole-optimizers = ["peepmatic"]
|
||||
|
||||
# Enable the use of `peepmatic`-generated peephole optimizers.
|
||||
enable-peepmatic = ["peepmatic-runtime"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -71,4 +71,22 @@ fn main() {
|
|||
);
|
||||
println!("cargo:warning=Generated files are in {}", out_dir);
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-peephole-optimizers")]
|
||||
rebuild_peephole_optimizers();
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-peephole-optimizers")]
|
||||
fn rebuild_peephole_optimizers() {
|
||||
use std::path::Path;
|
||||
|
||||
let source_path = Path::new("src").join("preopt.peepmatic");
|
||||
println!("cargo:rerun-if-changed={}", source_path.display());
|
||||
|
||||
let preopt =
|
||||
peepmatic::compile_file(&source_path).expect("failed to compile `src/preopt.peepmatic`");
|
||||
|
||||
preopt
|
||||
.serialize_to_file(&Path::new("src").join("preopt.serialized"))
|
||||
.expect("failed to serialize peephole optimizer to `src/preopt.serialized`");
|
||||
}
|
||||
|
|
|
@ -54,6 +54,9 @@ pub enum ValueConversion {
|
|||
|
||||
/// Unsigned zero-extend value to the required type.
|
||||
Uext(Type),
|
||||
|
||||
/// Pass value by pointer of given integer type.
|
||||
Pointer(Type),
|
||||
}
|
||||
|
||||
impl ValueConversion {
|
||||
|
@ -63,7 +66,7 @@ impl ValueConversion {
|
|||
Self::IntSplit => ty.half_width().expect("Integer type too small to split"),
|
||||
Self::VectorSplit => ty.half_vector().expect("Not a vector"),
|
||||
Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
|
||||
Self::Sext(nty) | Self::Uext(nty) => nty,
|
||||
Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,6 +77,11 @@ impl ValueConversion {
|
|||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a conversion to pointer?
|
||||
pub fn is_pointer(self) -> bool {
|
||||
matches!(self, Self::Pointer(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// Common trait for assigning arguments to registers or stack locations.
|
||||
|
@ -110,10 +118,16 @@ pub fn legalize_args<AA: ArgAssigner>(args: &[AbiParam], aa: &mut AA) -> Option<
|
|||
}
|
||||
// Split this argument into two smaller ones. Then revisit both.
|
||||
ArgAction::Convert(conv) => {
|
||||
debug_assert!(
|
||||
!arg.legalized_to_pointer,
|
||||
"No more conversions allowed after conversion to pointer"
|
||||
);
|
||||
let value_type = conv.apply(arg.value_type);
|
||||
let new_arg = AbiParam { value_type, ..arg };
|
||||
args.to_mut()[argno].value_type = value_type;
|
||||
if conv.is_split() {
|
||||
if conv.is_pointer() {
|
||||
args.to_mut()[argno].legalized_to_pointer = true;
|
||||
} else if conv.is_split() {
|
||||
let new_arg = AbiParam { value_type, ..arg };
|
||||
args.to_mut().insert(argno + 1, new_arg);
|
||||
}
|
||||
}
|
||||
|
@ -152,6 +166,10 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
|
|||
let have_bits = have.bits();
|
||||
let arg_bits = arg.value_type.bits();
|
||||
|
||||
if arg.legalized_to_pointer {
|
||||
return ValueConversion::Pointer(arg.value_type);
|
||||
}
|
||||
|
||||
match have_bits.cmp(&arg_bits) {
|
||||
// We have fewer bits than the ABI argument.
|
||||
Ordering::Less => {
|
||||
|
@ -226,5 +244,12 @@ mod tests {
|
|||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::IntBits
|
||||
);
|
||||
|
||||
// Value is passed by reference
|
||||
arg.legalized_to_pointer = true;
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::Pointer(types::I32)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,8 @@ const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
|
|||
/// The first value in the bitmap is of the lowest addressed slot on the stack.
|
||||
/// As all stacks in Isa's supported by Cranelift grow down, this means that
|
||||
/// first value is of the top of the stack and values proceed down the stack.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
|
||||
pub struct Stackmap {
|
||||
bitmap: Vec<BitSet<Num>>,
|
||||
mapped_words: u32,
|
||||
|
|
|
@ -5,12 +5,14 @@
|
|||
//!
|
||||
//! If you would like to add support for larger bitsets in the future, you need to change the trait
|
||||
//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
|
||||
|
||||
use core::convert::{From, Into};
|
||||
use core::mem::size_of;
|
||||
use core::ops::{Add, BitOr, Shl, Sub};
|
||||
|
||||
/// A small bitset built on a single primitive integer type
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct BitSet<T>(pub T);
|
||||
|
||||
impl<T> BitSet<T>
|
||||
|
|
|
@ -27,6 +27,7 @@ use crate::nan_canonicalization::do_nan_canonicalization;
|
|||
use crate::postopt::do_postopt;
|
||||
use crate::redundant_reload_remover::RedundantReloadRemover;
|
||||
use crate::regalloc;
|
||||
use crate::remove_constant_phis::do_remove_constant_phis;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{FlagsOrIsa, OptLevel};
|
||||
use crate::simple_gvn::do_simple_gvn;
|
||||
|
@ -179,6 +180,8 @@ impl Context {
|
|||
self.dce(isa)?;
|
||||
}
|
||||
|
||||
self.remove_constant_phis(isa)?;
|
||||
|
||||
if let Some(backend) = isa.get_mach_backend() {
|
||||
let result = backend.compile_function(&self.func, self.want_disasm)?;
|
||||
let info = result.code_info();
|
||||
|
@ -224,7 +227,7 @@ impl Context {
|
|||
let _tt = timing::binemit();
|
||||
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
|
||||
if let Some(ref result) = &self.mach_compile_result {
|
||||
result.sections.emit(&mut sink);
|
||||
result.buffer.emit(&mut sink);
|
||||
} else {
|
||||
isa.emit_function_to_memory(&self.func, &mut sink);
|
||||
}
|
||||
|
@ -292,6 +295,16 @@ impl Context {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform constant-phi removal on the function.
|
||||
pub fn remove_constant_phis<'a, FOI: Into<FlagsOrIsa<'a>>>(
|
||||
&mut self,
|
||||
fisa: FOI,
|
||||
) -> CodegenResult<()> {
|
||||
do_remove_constant_phis(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform pre-legalization rewrites on the function.
|
||||
pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_preopt(&mut self.func, &mut self.cfg, isa);
|
||||
|
|
|
@ -794,15 +794,20 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
|||
if !self.srcloc.is_default() {
|
||||
self.func.srclocs[inst] = self.srcloc;
|
||||
}
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
|
||||
match self
|
||||
.isa
|
||||
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
|
||||
{
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
|
||||
// Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
|
||||
// during lowering.
|
||||
if self.isa.get_mach_backend().is_none() {
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
|
||||
match self
|
||||
.isa
|
||||
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
|
||||
{
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
}
|
||||
}
|
||||
|
||||
&mut self.func.dfg
|
||||
|
|
|
@ -40,3 +40,24 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
|
|||
let opcode = data.opcode();
|
||||
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
|
||||
}
|
||||
|
||||
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
|
||||
pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
|
||||
has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
|
||||
}
|
||||
|
||||
/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
|
||||
/// represented in 64 bits?
|
||||
pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
|
||||
let data = &func.dfg[inst];
|
||||
if data.opcode() == Opcode::Null {
|
||||
return Some(0);
|
||||
}
|
||||
match data {
|
||||
&InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
|
||||
&InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -234,11 +234,7 @@ impl DataFlowGraph {
|
|||
|
||||
/// Get the type of a value.
|
||||
pub fn value_type(&self, v: Value) -> Type {
|
||||
match self.values[v] {
|
||||
ValueData::Inst { ty, .. }
|
||||
| ValueData::Param { ty, .. }
|
||||
| ValueData::Alias { ty, .. } => ty,
|
||||
}
|
||||
self.values[v].ty()
|
||||
}
|
||||
|
||||
/// Get the definition of a value.
|
||||
|
@ -383,9 +379,14 @@ pub enum ValueDef {
|
|||
impl ValueDef {
|
||||
/// Unwrap the instruction where the value was defined, or panic.
|
||||
pub fn unwrap_inst(&self) -> Inst {
|
||||
self.inst().expect("Value is not an instruction result")
|
||||
}
|
||||
|
||||
/// Get the instruction where the value was defined, if any.
|
||||
pub fn inst(&self) -> Option<Inst> {
|
||||
match *self {
|
||||
Self::Result(inst, _) => inst,
|
||||
_ => panic!("Value is not an instruction result"),
|
||||
Self::Result(inst, _) => Some(inst),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -428,6 +429,16 @@ enum ValueData {
|
|||
Alias { ty: Type, original: Value },
|
||||
}
|
||||
|
||||
impl ValueData {
|
||||
fn ty(&self) -> Type {
|
||||
match *self {
|
||||
ValueData::Inst { ty, .. }
|
||||
| ValueData::Param { ty, .. }
|
||||
| ValueData::Alias { ty, .. } => ty,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Instructions.
|
||||
///
|
||||
impl DataFlowGraph {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
|
||||
use crate::isa::{CallConv, RegInfo, RegUnit};
|
||||
use crate::machinst::RelocDistance;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt;
|
||||
use core::str::FromStr;
|
||||
|
@ -155,6 +156,8 @@ pub struct AbiParam {
|
|||
/// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
|
||||
/// been legalized.
|
||||
pub location: ArgumentLoc,
|
||||
/// Was the argument converted to pointer during legalization?
|
||||
pub legalized_to_pointer: bool,
|
||||
}
|
||||
|
||||
impl AbiParam {
|
||||
|
@ -165,6 +168,7 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose: ArgumentPurpose::Normal,
|
||||
location: Default::default(),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,6 +179,7 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: Default::default(),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,6 +190,7 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: ArgumentLoc::Reg(regunit),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -218,6 +224,9 @@ pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
|
|||
impl<'a> fmt::Display for DisplayAbiParam<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0.value_type)?;
|
||||
if self.0.legalized_to_pointer {
|
||||
write!(f, " ptr")?;
|
||||
}
|
||||
match self.0.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => write!(f, " uext")?,
|
||||
|
@ -366,6 +375,16 @@ pub struct ExtFuncData {
|
|||
/// Will this function be defined nearby, such that it will always be a certain distance away,
|
||||
/// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
|
||||
/// symbols meant to be preemptible cannot be considered colocated.
|
||||
///
|
||||
/// If `true`, some backends may use relocation forms that have limited range. The exact
|
||||
/// distance depends on the code model in use. Currently on AArch64, for example, Cranelift
|
||||
/// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how
|
||||
/// far away the target will be, it is best not to set the `colocated` flag; in general, this
|
||||
/// flag is best used when the target is known to be in the same unit of code generation, such
|
||||
/// as a Wasm module.
|
||||
///
|
||||
/// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A
|
||||
/// `colocated` flag value of `true` implies `RelocDistance::Near`.
|
||||
pub colocated: bool,
|
||||
}
|
||||
|
||||
|
@ -378,6 +397,17 @@ impl fmt::Display for ExtFuncData {
|
|||
}
|
||||
}
|
||||
|
||||
impl ExtFuncData {
|
||||
/// Return an estimate of the distance to the referred-to function symbol.
|
||||
pub fn reloc_distance(&self) -> RelocDistance {
|
||||
if self.colocated {
|
||||
RelocDistance::Near
|
||||
} else {
|
||||
RelocDistance::Far
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -393,6 +423,8 @@ mod tests {
|
|||
assert_eq!(t.sext().to_string(), "i32 sext");
|
||||
t.purpose = ArgumentPurpose::StructReturn;
|
||||
assert_eq!(t.to_string(), "i32 uext sret");
|
||||
t.legalized_to_pointer = true;
|
||||
assert_eq!(t.to_string(), "i32 ptr uext sret");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -308,6 +308,30 @@ impl Function {
|
|||
// function, assume it is not a leaf.
|
||||
self.dfg.signatures.is_empty()
|
||||
}
|
||||
|
||||
/// Replace the `dst` instruction's data with the `src` instruction's data
|
||||
/// and then remove `src`.
|
||||
///
|
||||
/// `src` and its result values should not be used at all, as any uses would
|
||||
/// be left dangling after calling this method.
|
||||
///
|
||||
/// `src` and `dst` must have the same number of resulting values, and
|
||||
/// `src`'s i^th value must have the same type as `dst`'s i^th value.
|
||||
pub fn transplant_inst(&mut self, dst: Inst, src: Inst) {
|
||||
debug_assert_eq!(
|
||||
self.dfg.inst_results(dst).len(),
|
||||
self.dfg.inst_results(src).len()
|
||||
);
|
||||
debug_assert!(self
|
||||
.dfg
|
||||
.inst_results(dst)
|
||||
.iter()
|
||||
.zip(self.dfg.inst_results(src))
|
||||
.all(|(a, b)| self.dfg.value_type(*a) == self.dfg.value_type(*b)));
|
||||
|
||||
self.dfg[dst] = self.dfg[src].clone();
|
||||
self.layout.remove_inst(src);
|
||||
}
|
||||
}
|
||||
|
||||
/// Additional annotations for function display.
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
use crate::ir::immediates::{Imm64, Offset32};
|
||||
use crate::ir::{ExternalName, GlobalValue, Type};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::machinst::RelocDistance;
|
||||
use core::fmt;
|
||||
|
||||
/// Information about a global value declaration.
|
||||
|
@ -62,6 +63,10 @@ pub enum GlobalValueData {
|
|||
/// Will this symbol be defined nearby, such that it will always be a certain distance
|
||||
/// away, after linking? If so, references to it can avoid going through a GOT. Note that
|
||||
/// symbols meant to be preemptible cannot be colocated.
|
||||
///
|
||||
/// If `true`, some backends may use relocation forms that have limited range: for example,
|
||||
/// a +/- 2^27-byte range on AArch64. See the documentation for
|
||||
/// [`RelocDistance`](machinst::RelocDistance) for more details.
|
||||
colocated: bool,
|
||||
|
||||
/// Does this symbol refer to a thread local storage value?
|
||||
|
@ -85,6 +90,20 @@ impl GlobalValueData {
|
|||
Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this global references a symbol, return an estimate of the relocation distance,
|
||||
/// based on the `colocated` flag.
|
||||
pub fn maybe_reloc_distance(&self) -> Option<RelocDistance> {
|
||||
match self {
|
||||
&GlobalValueData::Symbol {
|
||||
colocated: true, ..
|
||||
} => Some(RelocDistance::Near),
|
||||
&GlobalValueData::Symbol {
|
||||
colocated: false, ..
|
||||
} => Some(RelocDistance::Far),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for GlobalValueData {
|
||||
|
|
|
@ -62,6 +62,21 @@ impl Imm64 {
|
|||
pub fn bits(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Sign extend this immediate as if it were a signed integer of the given
|
||||
/// power-of-two width.
|
||||
pub fn sign_extend_from_width(&mut self, bit_width: u16) {
|
||||
debug_assert!(bit_width.is_power_of_two());
|
||||
|
||||
if bit_width >= 64 {
|
||||
return;
|
||||
}
|
||||
|
||||
let bit_width = bit_width as i64;
|
||||
let delta = 64 - bit_width;
|
||||
let sign_extended = (self.0 << delta) >> delta;
|
||||
*self = Imm64(sign_extended);
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<i64> for Imm64 {
|
||||
|
|
|
@ -11,9 +11,7 @@ use core::fmt::{self, Display, Formatter};
|
|||
use core::ops::{Deref, DerefMut};
|
||||
use core::str::FromStr;
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::types;
|
||||
use crate::ir::{Block, FuncRef, JumpTable, SigRef, Type, Value};
|
||||
use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
|
||||
use crate::isa;
|
||||
|
||||
use crate::bitset::BitSet;
|
||||
|
@ -257,6 +255,30 @@ impl InstructionData {
|
|||
}
|
||||
}
|
||||
|
||||
/// If this is a trapping instruction, get its trap code. Otherwise, return
|
||||
/// `None`.
|
||||
pub fn trap_code(&self) -> Option<TrapCode> {
|
||||
match *self {
|
||||
Self::CondTrap { code, .. }
|
||||
| Self::FloatCondTrap { code, .. }
|
||||
| Self::IntCondTrap { code, .. }
|
||||
| Self::Trap { code, .. } => Some(code),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this is a trapping instruction, get an exclusive reference to its
|
||||
/// trap code. Otherwise, return `None`.
|
||||
pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> {
|
||||
match self {
|
||||
Self::CondTrap { code, .. }
|
||||
| Self::FloatCondTrap { code, .. }
|
||||
| Self::IntCondTrap { code, .. }
|
||||
| Self::Trap { code, .. } => Some(code),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return information about a call instruction.
|
||||
///
|
||||
/// Any instruction that can call another function reveals its call signature here.
|
||||
|
@ -274,6 +296,39 @@ impl InstructionData {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn sign_extend_immediates(&mut self, ctrl_typevar: Type) {
|
||||
if ctrl_typevar.is_invalid() {
|
||||
return;
|
||||
}
|
||||
|
||||
let bit_width = ctrl_typevar.bits();
|
||||
|
||||
match self {
|
||||
Self::BinaryImm64 {
|
||||
opcode,
|
||||
arg: _,
|
||||
imm,
|
||||
} => {
|
||||
if matches!(opcode, Opcode::SdivImm | Opcode::SremImm) {
|
||||
imm.sign_extend_from_width(bit_width);
|
||||
}
|
||||
}
|
||||
Self::IntCompareImm {
|
||||
opcode,
|
||||
arg: _,
|
||||
cond,
|
||||
imm,
|
||||
} => {
|
||||
debug_assert_eq!(*opcode, Opcode::IcmpImm);
|
||||
if cond.unsigned() != *cond {
|
||||
imm.sign_extend_from_width(bit_width);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about branch and jump instructions.
|
||||
|
|
|
@ -24,6 +24,20 @@ pub enum LibCall {
|
|||
/// probe for stack overflow. These are emitted for functions which need
|
||||
/// when the `enable_probestack` setting is true.
|
||||
Probestack,
|
||||
/// udiv.i64
|
||||
UdivI64,
|
||||
/// sdiv.i64
|
||||
SdivI64,
|
||||
/// urem.i64
|
||||
UremI64,
|
||||
/// srem.i64
|
||||
SremI64,
|
||||
/// ishl.i64
|
||||
IshlI64,
|
||||
/// ushr.i64
|
||||
UshrI64,
|
||||
/// sshr.i64
|
||||
SshrI64,
|
||||
/// ceil.f32
|
||||
CeilF32,
|
||||
/// ceil.f64
|
||||
|
@ -63,6 +77,13 @@ impl FromStr for LibCall {
|
|||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"Probestack" => Ok(Self::Probestack),
|
||||
"UdivI64" => Ok(Self::UdivI64),
|
||||
"SdivI64" => Ok(Self::SdivI64),
|
||||
"UremI64" => Ok(Self::UremI64),
|
||||
"SremI64" => Ok(Self::SremI64),
|
||||
"IshlI64" => Ok(Self::IshlI64),
|
||||
"UshrI64" => Ok(Self::UshrI64),
|
||||
"SshrI64" => Ok(Self::SshrI64),
|
||||
"CeilF32" => Ok(Self::CeilF32),
|
||||
"CeilF64" => Ok(Self::CeilF64),
|
||||
"FloorF32" => Ok(Self::FloorF32),
|
||||
|
@ -88,6 +109,16 @@ impl LibCall {
|
|||
/// Returns `None` if no well-known library routine name exists for that instruction.
|
||||
pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
|
||||
Some(match ctrl_type {
|
||||
types::I64 => match opcode {
|
||||
Opcode::Udiv => Self::UdivI64,
|
||||
Opcode::Sdiv => Self::SdivI64,
|
||||
Opcode::Urem => Self::UremI64,
|
||||
Opcode::Srem => Self::SremI64,
|
||||
Opcode::Ishl => Self::IshlI64,
|
||||
Opcode::Ushr => Self::UshrI64,
|
||||
Opcode::Sshr => Self::SshrI64,
|
||||
_ => return None,
|
||||
},
|
||||
types::F32 => match opcode {
|
||||
Opcode::Ceil => Self::CeilF32,
|
||||
Opcode::Floor => Self::FloorF32,
|
||||
|
|
|
@ -27,9 +27,6 @@ pub enum TrapCode {
|
|||
/// A `table_addr` instruction detected an out-of-bounds error.
|
||||
TableOutOfBounds,
|
||||
|
||||
/// Other bounds checking error.
|
||||
OutOfBounds,
|
||||
|
||||
/// Indirect call to a null table entry.
|
||||
IndirectCallToNull,
|
||||
|
||||
|
@ -63,7 +60,6 @@ impl Display for TrapCode {
|
|||
StackOverflow => "stk_ovf",
|
||||
HeapOutOfBounds => "heap_oob",
|
||||
TableOutOfBounds => "table_oob",
|
||||
OutOfBounds => "oob",
|
||||
IndirectCallToNull => "icall_null",
|
||||
BadSignature => "bad_sig",
|
||||
IntegerOverflow => "int_ovf",
|
||||
|
@ -86,7 +82,6 @@ impl FromStr for TrapCode {
|
|||
"stk_ovf" => Ok(StackOverflow),
|
||||
"heap_oob" => Ok(HeapOutOfBounds),
|
||||
"table_oob" => Ok(TableOutOfBounds),
|
||||
"oob" => Ok(OutOfBounds),
|
||||
"icall_null" => Ok(IndirectCallToNull),
|
||||
"bad_sig" => Ok(BadSignature),
|
||||
"int_ovf" => Ok(IntegerOverflow),
|
||||
|
@ -106,11 +101,10 @@ mod tests {
|
|||
use alloc::string::ToString;
|
||||
|
||||
// Everything but user-defined codes.
|
||||
const CODES: [TrapCode; 11] = [
|
||||
const CODES: [TrapCode; 10] = [
|
||||
TrapCode::StackOverflow,
|
||||
TrapCode::HeapOutOfBounds,
|
||||
TrapCode::TableOutOfBounds,
|
||||
TrapCode::OutOfBounds,
|
||||
TrapCode::IndirectCallToNull,
|
||||
TrapCode::BadSignature,
|
||||
TrapCode::IntegerOverflow,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -3,14 +3,14 @@
|
|||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
use crate::machinst::MachLabel;
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use core::convert::{Into, TryFrom};
|
||||
use core::convert::Into;
|
||||
use std::string::String;
|
||||
|
||||
/// A shift operator for a register or immediate.
|
||||
|
@ -112,7 +112,9 @@ pub enum MemLabel {
|
|||
/// A memory argument to load/store, encapsulating the possible addressing modes.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MemArg {
|
||||
Label(MemLabel),
|
||||
//
|
||||
// Real ARM64 addressing modes:
|
||||
//
|
||||
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
|
||||
PostIndexed(Writable<Reg>, SImm9),
|
||||
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
|
||||
|
@ -137,11 +139,35 @@ pub enum MemArg {
|
|||
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
|
||||
UnsignedOffset(Reg, UImm12Scaled),
|
||||
|
||||
/// Offset from the stack pointer. Lowered into a real amode at emission.
|
||||
SPOffset(i64),
|
||||
//
|
||||
// virtual addressing modes that are lowered at emission time:
|
||||
//
|
||||
/// Reference to a "label": e.g., a symbol.
|
||||
Label(MemLabel),
|
||||
|
||||
/// Offset from the frame pointer. Lowered into a real amode at emission.
|
||||
FPOffset(i64),
|
||||
/// Arbitrary offset from a register. Converted to generation of large
|
||||
/// offsets with multiple instructions as necessary during code emission.
|
||||
RegOffset(Reg, i64, Type),
|
||||
|
||||
/// Offset from the stack pointer.
|
||||
SPOffset(i64, Type),
|
||||
|
||||
/// Offset from the frame pointer.
|
||||
FPOffset(i64, Type),
|
||||
|
||||
/// Offset from the "nominal stack pointer", which is where the real SP is
|
||||
/// just after stack and spill slots are allocated in the function prologue.
|
||||
/// At emission time, this is converted to `SPOffset` with a fixup added to
|
||||
/// the offset constant. The fixup is a running value that is tracked as
|
||||
/// emission iterates through instructions in linear order, and can be
|
||||
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
|
||||
///
|
||||
/// The standard ABI is in charge of handling this (by emitting the
|
||||
/// adjustment meta-instructions). It maintains the invariant that "nominal
|
||||
/// SP" is where the actual SP is after the function prologue and before
|
||||
/// clobber pushes. See the diagram in the documentation for
|
||||
/// [crate::isa::aarch64::abi](the ABI module) for more details.
|
||||
NominalSPOffset(i64, Type),
|
||||
}
|
||||
|
||||
impl MemArg {
|
||||
|
@ -152,17 +178,6 @@ impl MemArg {
|
|||
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
|
||||
}
|
||||
|
||||
/// Memory reference using an address in a register and an offset, if possible.
|
||||
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
|
||||
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
|
||||
Some(MemArg::Unscaled(reg, simm9))
|
||||
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
|
||||
Some(MemArg::UnsignedOffset(reg, uimm12s))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory reference using the sum of two registers as an address.
|
||||
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
|
||||
MemArg::RegReg(reg1, reg2)
|
||||
|
@ -281,78 +296,44 @@ impl CondBrKind {
|
|||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a BlockIndex, as passed into
|
||||
/// An unresolved reference to a Label, as passed into
|
||||
/// `lower_branch_group()`.
|
||||
Block(BlockIndex),
|
||||
/// A resolved reference to another instruction, after
|
||||
/// `Inst::with_block_offsets()`.
|
||||
ResolvedOffset(isize),
|
||||
Label(MachLabel),
|
||||
/// A fixed PC offset.
|
||||
ResolvedOffset(i32),
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Lower the branch target given offsets of each block.
|
||||
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
|
||||
/// Return the target's label, if it is a label-based target.
|
||||
pub fn as_label(self) -> Option<MachLabel> {
|
||||
match self {
|
||||
&mut BranchTarget::Block(bix) => {
|
||||
let bix = usize::try_from(bix).unwrap();
|
||||
assert!(bix < targets.len());
|
||||
let block_offset_in_func = targets[bix];
|
||||
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
|
||||
*self = BranchTarget::ResolvedOffset(branch_offset);
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(..) => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the block index.
|
||||
pub fn as_block_index(&self) -> Option<BlockIndex> {
|
||||
match self {
|
||||
&BranchTarget::Block(bix) => Some(bix),
|
||||
BranchTarget::Label(l) => Some(l),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as 4-byte words. Returns `0` if not
|
||||
/// yet resolved (in that case, we're only computing
|
||||
/// size and the offset doesn't matter).
|
||||
pub fn as_offset_words(&self) -> isize {
|
||||
match self {
|
||||
&BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_offset19_or_zero(self) -> u32 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
assert!(off <= 0x3ffff);
|
||||
assert!(off >= -0x40000);
|
||||
(off as u32) & 0x7ffff
|
||||
}
|
||||
|
||||
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
|
||||
pub fn as_off26(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 25)) && (off >= -(1 << 25)) {
|
||||
Some((off as u32) & ((1 << 26) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a 19-bit offset, or `None` if overflow.
|
||||
pub fn as_off19(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 18)) && (off >= -(1 << 18)) {
|
||||
Some((off as u32) & ((1 << 19) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Map the block index given a transform map.
|
||||
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
|
||||
match self {
|
||||
&mut BranchTarget::Block(ref mut bix) => {
|
||||
let n = block_index_map[usize::try_from(*bix).unwrap()];
|
||||
*bix = n;
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(_) => {}
|
||||
}
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_offset26_or_zero(self) -> u32 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
_ => 0,
|
||||
};
|
||||
assert!(off <= 0x1ffffff);
|
||||
assert!(off >= -0x2000000);
|
||||
(off as u32) & 0x3ffffff
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -443,8 +424,11 @@ impl ShowWithRRU for MemArg {
|
|||
simm9.show_rru(mb_rru)
|
||||
),
|
||||
// Eliminated by `mem_finalize()`.
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Unexpected stack-offset mem-arg mode!")
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..)
|
||||
| &MemArg::RegOffset(..) => {
|
||||
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -485,18 +469,21 @@ impl ShowWithRRU for Cond {
|
|||
impl ShowWithRRU for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&BranchTarget::Block(block) => format!("block{}", block),
|
||||
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
|
||||
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
|
||||
/// 64-bit variants of many instructions (and integer registers).
|
||||
/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
|
||||
/// variants of vector instructions.
|
||||
/// TODO: Create a separate type for SIMD & floating-point operands.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstSize {
|
||||
Size32,
|
||||
Size64,
|
||||
Size128,
|
||||
}
|
||||
|
||||
impl InstSize {
|
||||
|
@ -519,11 +506,13 @@ impl InstSize {
|
|||
/// Convert from a needed width to the smallest size that fits.
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
|
||||
let bits: usize = bits.into();
|
||||
assert!(bits <= 64);
|
||||
assert!(bits <= 128);
|
||||
if bits <= 32 {
|
||||
InstSize::Size32
|
||||
} else {
|
||||
} else if bits <= 64 {
|
||||
InstSize::Size64
|
||||
} else {
|
||||
InstSize::Size128
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -532,11 +521,12 @@ impl InstSize {
|
|||
Self::from_bits(ty_bits(ty))
|
||||
}
|
||||
|
||||
/// Convert to I32 or I64.
|
||||
/// Convert to I32, I64, or I128.
|
||||
pub fn to_ty(self) -> Type {
|
||||
match self {
|
||||
InstSize::Size32 => I32,
|
||||
InstSize::Size64 => I64,
|
||||
InstSize::Size128 => I128,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,6 +534,9 @@ impl InstSize {
|
|||
match self {
|
||||
InstSize::Size32 => 0,
|
||||
InstSize::Size64 => 1,
|
||||
_ => {
|
||||
panic!("Unexpected size");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,12 +4,13 @@ use crate::binemit::{CodeOffset, Reloc};
|
|||
use crate::ir::constant::ConstantData;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::TrapCode;
|
||||
use crate::isa::aarch64::{inst::regs::PINNED_REG, inst::*};
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use log::debug;
|
||||
|
||||
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
|
||||
/// offset, possibly emitting relocation(s) as necessary.
|
||||
|
@ -23,43 +24,67 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
|
|||
/// generic arbitrary stack offset) into real addressing modes, possibly by
|
||||
/// emitting some helper instructions that come immediately before the use
|
||||
/// of this amode.
|
||||
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
|
||||
pub fn mem_finalize(
|
||||
insn_off: CodeOffset,
|
||||
mem: &MemArg,
|
||||
state: &EmitState,
|
||||
) -> (SmallVec<[Inst; 4]>, MemArg) {
|
||||
match mem {
|
||||
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
|
||||
&MemArg::RegOffset(_, off, ty)
|
||||
| &MemArg::SPOffset(off, ty)
|
||||
| &MemArg::FPOffset(off, ty)
|
||||
| &MemArg::NominalSPOffset(off, ty) => {
|
||||
let basereg = match mem {
|
||||
&MemArg::SPOffset(..) => stack_reg(),
|
||||
&MemArg::RegOffset(reg, _, _) => reg,
|
||||
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
|
||||
&MemArg::FPOffset(..) => fp_reg(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let adj = match mem {
|
||||
&MemArg::NominalSPOffset(..) => {
|
||||
debug!(
|
||||
"mem_finalize: nominal SP offset {} + adj {} -> {}",
|
||||
off,
|
||||
state.virtual_sp_offset,
|
||||
off + state.virtual_sp_offset
|
||||
);
|
||||
state.virtual_sp_offset
|
||||
}
|
||||
_ => 0,
|
||||
};
|
||||
let off = off + adj;
|
||||
|
||||
if let Some(simm9) = SImm9::maybe_from_i64(off) {
|
||||
let mem = MemArg::Unscaled(basereg, simm9);
|
||||
(vec![], mem)
|
||||
(smallvec![], mem)
|
||||
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
|
||||
let mem = MemArg::UnsignedOffset(basereg, uimm12s);
|
||||
(smallvec![], mem)
|
||||
} else {
|
||||
// In an addition, x31 is the zero register, not sp; we have only one temporary
|
||||
// so we can't do the proper add here.
|
||||
debug_assert_ne!(
|
||||
basereg,
|
||||
stack_reg(),
|
||||
"should have diverted SP before mem_finalize"
|
||||
);
|
||||
|
||||
let tmp = writable_spilltmp_reg();
|
||||
let mut const_insts = Inst::load_constant(tmp, off as u64);
|
||||
let add_inst = Inst::AluRRR {
|
||||
// N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
|
||||
// (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
|
||||
// is a valid base (for SPOffset) which we must handle here.
|
||||
// Also, SP needs to be the first arg, not second.
|
||||
let add_inst = Inst::AluRRRExtend {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: tmp,
|
||||
rn: tmp.to_reg(),
|
||||
rm: basereg,
|
||||
rn: basereg,
|
||||
rm: tmp.to_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
};
|
||||
const_insts.push(add_inst);
|
||||
(const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
|
||||
(const_insts, MemArg::reg(tmp.to_reg()))
|
||||
}
|
||||
}
|
||||
|
||||
&MemArg::Label(ref label) => {
|
||||
let off = memlabel_finalize(insn_off, label);
|
||||
(vec![], MemArg::Label(MemLabel::PCRel(off)))
|
||||
(smallvec![], MemArg::Label(MemLabel::PCRel(off)))
|
||||
}
|
||||
_ => (vec![], mem.clone()),
|
||||
|
||||
_ => (smallvec![], mem.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,12 +98,12 @@ pub fn u64_constant(bits: u64) -> ConstantData {
|
|||
// Instructions and subcomponents: emission
|
||||
|
||||
fn machreg_to_gpr(m: Reg) -> u32 {
|
||||
assert!(m.get_class() == RegClass::I64);
|
||||
assert_eq!(m.get_class(), RegClass::I64);
|
||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||
}
|
||||
|
||||
fn machreg_to_vec(m: Reg) -> u32 {
|
||||
assert!(m.get_class() == RegClass::V128);
|
||||
assert_eq!(m.get_class(), RegClass::V128);
|
||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||
}
|
||||
|
||||
|
@ -137,6 +162,14 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
|
|||
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
|
||||
}
|
||||
|
||||
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
|
||||
match kind {
|
||||
CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
|
||||
CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
|
||||
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
|
||||
}
|
||||
}
|
||||
|
||||
const MOVE_WIDE_FIXED: u32 = 0x92800000;
|
||||
|
||||
#[repr(u32)]
|
||||
|
@ -275,8 +308,8 @@ fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) ->
|
|||
}
|
||||
|
||||
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert!(!is_16b); // to be supported later.
|
||||
0b00001110_101_00000_00011_1_00000_00000
|
||||
| ((is_16b as u32) << 30)
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
| (machreg_to_vec(rn) << 16)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
|
@ -322,8 +355,29 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
||||
fn emit(&self, sink: &mut O, flags: &settings::Flags) {
|
||||
fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||
let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
|
||||
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
virtual_sp_offset: i64,
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
type State = EmitState;
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
|
||||
// N.B.: we *must* not exceed the "worst-case size" used to compute
|
||||
// where to insert islands, except when islands are explicitly triggered
|
||||
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
|
||||
// to allow disabling the check for `JTSequence`, which is always
|
||||
// emitted following an `EmitIsland`.
|
||||
let mut start_off = sink.cur_offset();
|
||||
|
||||
match self {
|
||||
&Inst::AluRRR { alu_op, rd, rn, rm } => {
|
||||
let top11 = match alu_op {
|
||||
|
@ -596,10 +650,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
ref mem,
|
||||
srcloc,
|
||||
} => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
// ldst encoding helpers take Reg, not Writable<Reg>.
|
||||
|
@ -608,17 +662,17 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
// This is the base opcode (top 10 bits) for the "unscaled
|
||||
// immediate" form (Unscaled). Other addressing modes will OR in
|
||||
// other values for bits 24/25 (bits 1/2 of this constant).
|
||||
let op = match self {
|
||||
&Inst::ULoad8 { .. } => 0b0011100001,
|
||||
&Inst::SLoad8 { .. } => 0b0011100010,
|
||||
&Inst::ULoad16 { .. } => 0b0111100001,
|
||||
&Inst::SLoad16 { .. } => 0b0111100010,
|
||||
&Inst::ULoad32 { .. } => 0b1011100001,
|
||||
&Inst::SLoad32 { .. } => 0b1011100010,
|
||||
&Inst::ULoad64 { .. } => 0b1111100001,
|
||||
&Inst::FpuLoad32 { .. } => 0b1011110001,
|
||||
&Inst::FpuLoad64 { .. } => 0b1111110001,
|
||||
&Inst::FpuLoad128 { .. } => 0b0011110011,
|
||||
let (op, bits) = match self {
|
||||
&Inst::ULoad8 { .. } => (0b0011100001, 8),
|
||||
&Inst::SLoad8 { .. } => (0b0011100010, 8),
|
||||
&Inst::ULoad16 { .. } => (0b0111100001, 16),
|
||||
&Inst::SLoad16 { .. } => (0b0111100010, 16),
|
||||
&Inst::ULoad32 { .. } => (0b1011100001, 32),
|
||||
&Inst::SLoad32 { .. } => (0b1011100010, 32),
|
||||
&Inst::ULoad64 { .. } => (0b1111100001, 64),
|
||||
&Inst::FpuLoad32 { .. } => (0b1011110001, 32),
|
||||
&Inst::FpuLoad64 { .. } => (0b1111110001, 64),
|
||||
&Inst::FpuLoad128 { .. } => (0b0011110011, 128),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
@ -632,6 +686,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
|
||||
}
|
||||
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
|
||||
if uimm12scaled.value() != 0 {
|
||||
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
|
||||
}
|
||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
|
||||
}
|
||||
&MemArg::RegReg(r1, r2) => {
|
||||
|
@ -640,19 +697,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
));
|
||||
}
|
||||
&MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
|
||||
match (ty, self) {
|
||||
(I8, &Inst::ULoad8 { .. }) => {}
|
||||
(I8, &Inst::SLoad8 { .. }) => {}
|
||||
(I16, &Inst::ULoad16 { .. }) => {}
|
||||
(I16, &Inst::SLoad16 { .. }) => {}
|
||||
(I32, &Inst::ULoad32 { .. }) => {}
|
||||
(I32, &Inst::SLoad32 { .. }) => {}
|
||||
(I64, &Inst::ULoad64 { .. }) => {}
|
||||
(F32, &Inst::FpuLoad32 { .. }) => {}
|
||||
(F64, &Inst::FpuLoad64 { .. }) => {}
|
||||
(I128, &Inst::FpuLoad128 { .. }) => {}
|
||||
_ => panic!("Mismatching reg-scaling type in MemArg"),
|
||||
}
|
||||
assert_eq!(bits, ty_bits(ty));
|
||||
let extendop = match &mem {
|
||||
&MemArg::RegScaled(..) => None,
|
||||
&MemArg::RegScaledExtended(_, _, _, op) => Some(op),
|
||||
|
@ -697,9 +742,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
|
||||
}
|
||||
// Eliminated by `mem_finalize()` above.
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Should not see stack-offset here!")
|
||||
}
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
|
||||
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -739,20 +785,20 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
ref mem,
|
||||
srcloc,
|
||||
} => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
let op = match self {
|
||||
&Inst::Store8 { .. } => 0b0011100000,
|
||||
&Inst::Store16 { .. } => 0b0111100000,
|
||||
&Inst::Store32 { .. } => 0b1011100000,
|
||||
&Inst::Store64 { .. } => 0b1111100000,
|
||||
&Inst::FpuStore32 { .. } => 0b1011110000,
|
||||
&Inst::FpuStore64 { .. } => 0b1111110000,
|
||||
&Inst::FpuStore128 { .. } => 0b0011110010,
|
||||
let (op, bits) = match self {
|
||||
&Inst::Store8 { .. } => (0b0011100000, 8),
|
||||
&Inst::Store16 { .. } => (0b0111100000, 16),
|
||||
&Inst::Store32 { .. } => (0b1011100000, 32),
|
||||
&Inst::Store64 { .. } => (0b1111100000, 64),
|
||||
&Inst::FpuStore32 { .. } => (0b1011110000, 32),
|
||||
&Inst::FpuStore64 { .. } => (0b1111110000, 64),
|
||||
&Inst::FpuStore128 { .. } => (0b0011110010, 128),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
@ -766,6 +812,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
|
||||
}
|
||||
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
|
||||
if uimm12scaled.value() != 0 {
|
||||
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
|
||||
}
|
||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
|
||||
}
|
||||
&MemArg::RegReg(r1, r2) => {
|
||||
|
@ -794,9 +843,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
|
||||
}
|
||||
// Eliminated by `mem_finalize()` above.
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Should not see stack-offset here!")
|
||||
}
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
|
||||
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -883,6 +933,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
&Inst::FpuMove64 { rd, rn } => {
|
||||
sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
|
||||
}
|
||||
&Inst::FpuMove128 { rd, rn } => {
|
||||
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
let top22 = match fpu_op {
|
||||
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
|
||||
|
@ -913,6 +966,44 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
};
|
||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||
}
|
||||
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
|
||||
FPUOpRI::UShr32(imm) => {
|
||||
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::UShr64(imm) => {
|
||||
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::Sli64(imm) => {
|
||||
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b01_1_111110_0000000_010101_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::Sli32(imm) => {
|
||||
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b0_0_1_011110_0000000_010101_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
},
|
||||
&Inst::FpuRRRR {
|
||||
fpu_op,
|
||||
rd,
|
||||
|
@ -926,6 +1017,15 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
};
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let bits_12_16 = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(I8X16, ty);
|
||||
0b00101
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::FpuCmp32 { rn, rm } => {
|
||||
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
||||
}
|
||||
|
@ -980,11 +1080,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(8),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
sink.put4(const_data.to_bits());
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
|
@ -993,13 +1093,29 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
sink.put8(const_data.to_bits());
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let inst = Inst::FpuLoad128 {
|
||||
rd,
|
||||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(20),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
for i in const_data.to_le_bytes().iter() {
|
||||
sink.put1(*i);
|
||||
}
|
||||
}
|
||||
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
|
||||
}
|
||||
|
@ -1033,12 +1149,40 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecRRR { rd, rn, rm, alu_op } => {
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
} => {
|
||||
let enc_size_for_cmp = match ty {
|
||||
I8X16 => 0b00,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let (top11, bit15_10) = match alu_op {
|
||||
VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
|
||||
VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
|
||||
VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
|
||||
VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
|
||||
VecALUOp::SQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::SQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::UQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::UQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
|
||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
@ -1084,7 +1228,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
if top22 != 0 {
|
||||
sink.put4(enc_extend(top22, rd, rn));
|
||||
} else {
|
||||
Inst::mov32(rd, rn).emit(sink, flags);
|
||||
Inst::mov32(rd, rn).emit(sink, flags, state);
|
||||
}
|
||||
}
|
||||
&Inst::Extend {
|
||||
|
@ -1107,7 +1251,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
rn: zero_reg(),
|
||||
rm: rd.to_reg(),
|
||||
};
|
||||
sub_inst.emit(sink, flags);
|
||||
sub_inst.emit(sink, flags, state);
|
||||
}
|
||||
&Inst::Extend {
|
||||
rd,
|
||||
|
@ -1127,10 +1271,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
panic!("Unsupported extend variant");
|
||||
}
|
||||
&Inst::Jump { ref dest } => {
|
||||
// TODO: differentiate between as_off26() returning `None` for
|
||||
// out-of-range vs. not-yet-finalized. The latter happens when we
|
||||
// do early (fake) emission for size computation.
|
||||
sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
|
||||
let off = sink.cur_offset();
|
||||
// Indicate that the jump uses a label, if so, so that a fixup can occur later.
|
||||
if let Some(l) = dest.as_label() {
|
||||
sink.use_label_at_offset(off, l, LabelUse::Branch26);
|
||||
sink.add_uncond_branch(off, off + 4, l);
|
||||
}
|
||||
// Emit the jump itself.
|
||||
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
|
||||
}
|
||||
&Inst::Ret => {
|
||||
sink.put4(0xd65f03c0);
|
||||
|
@ -1138,71 +1286,47 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
&Inst::EpiloguePlaceholder => {
|
||||
// Noop; this is just a placeholder for epilogues.
|
||||
}
|
||||
&Inst::Call {
|
||||
ref dest,
|
||||
loc,
|
||||
opcode,
|
||||
..
|
||||
} => {
|
||||
sink.add_reloc(loc, Reloc::Arm64Call, dest, 0);
|
||||
&Inst::Call { ref info } => {
|
||||
sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
|
||||
sink.put4(enc_jump26(0b100101, 0));
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(loc, opcode);
|
||||
if info.opcode.is_call() {
|
||||
sink.add_call_site(info.loc, info.opcode);
|
||||
}
|
||||
}
|
||||
&Inst::CallInd {
|
||||
rn, loc, opcode, ..
|
||||
} => {
|
||||
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(loc, opcode);
|
||||
&Inst::CallInd { ref info } => {
|
||||
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
|
||||
if info.opcode.is_call() {
|
||||
sink.add_call_site(info.loc, info.opcode);
|
||||
}
|
||||
}
|
||||
&Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
|
||||
&Inst::CondBrLowered { target, kind } => match kind {
|
||||
// TODO: handle >2^19 case by emitting a compound sequence with
|
||||
// an unconditional (26-bit) branch. We need branch-relaxation
|
||||
// adjustment machinery to enable this (because we don't want to
|
||||
// always emit the long form).
|
||||
CondBrKind::Zero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::NotZero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::Cond(c) => {
|
||||
sink.put4(enc_cbr(
|
||||
0b01010100,
|
||||
target.as_off19().unwrap_or(0),
|
||||
0b0,
|
||||
c.bits(),
|
||||
));
|
||||
}
|
||||
},
|
||||
&Inst::CondBrLoweredCompound {
|
||||
&Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
} => {
|
||||
// Conditional part first.
|
||||
match kind {
|
||||
CondBrKind::Zero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::NotZero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::Cond(c) => {
|
||||
sink.put4(enc_cbr(
|
||||
0b01010100,
|
||||
taken.as_off19().unwrap_or(0),
|
||||
0b0,
|
||||
c.bits(),
|
||||
));
|
||||
}
|
||||
let cond_off = sink.cur_offset();
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
|
||||
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
|
||||
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
|
||||
}
|
||||
// Unconditional part.
|
||||
sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
|
||||
sink.put4(enc_conditional_br(taken, kind));
|
||||
|
||||
// Unconditional part next.
|
||||
let uncond_off = sink.cur_offset();
|
||||
if let Some(l) = not_taken.as_label() {
|
||||
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
|
||||
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
|
||||
}
|
||||
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
|
||||
}
|
||||
&Inst::OneWayCondBr { target, kind } => {
|
||||
let off = sink.cur_offset();
|
||||
if let Some(l) = target.as_label() {
|
||||
sink.use_label_at_offset(off, l, LabelUse::Branch19);
|
||||
}
|
||||
sink.put4(enc_conditional_br(target, kind));
|
||||
}
|
||||
&Inst::IndirectBr { rn, .. } => {
|
||||
sink.put4(enc_br(rn));
|
||||
|
@ -1219,8 +1343,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.add_trap(srcloc, code);
|
||||
sink.put4(0xd4a00000);
|
||||
}
|
||||
&Inst::Adr { rd, ref label } => {
|
||||
let off = memlabel_finalize(sink.cur_offset_from_start(), label);
|
||||
&Inst::Adr { rd, off } => {
|
||||
assert!(off > -(1 << 20));
|
||||
assert!(off < (1 << 20));
|
||||
sink.put4(enc_adr(off, rd));
|
||||
|
@ -1235,26 +1358,20 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
ridx,
|
||||
rtmp1,
|
||||
rtmp2,
|
||||
ref targets,
|
||||
ref info,
|
||||
..
|
||||
} => {
|
||||
// This sequence is *one* instruction in the vcode, and is expanded only here at
|
||||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
||||
// the middle; we depend on hardcoded PC-rel addressing below.
|
||||
//
|
||||
// N.B.: if PC-rel addressing on ADR below is changed, also update
|
||||
// `Inst::with_block_offsets()` in aarch64/inst/mod.rs.
|
||||
|
||||
// Save index in a tmp (the live range of ridx only goes to start of this
|
||||
// sequence; rtmp1 or rtmp2 may overwrite it).
|
||||
let inst = Inst::gen_move(rtmp2, ridx, I64);
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
// Load address of jump table
|
||||
let inst = Inst::Adr {
|
||||
rd: rtmp1,
|
||||
label: MemLabel::PCRel(16),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
let inst = Inst::Adr { rd: rtmp1, off: 16 };
|
||||
inst.emit(sink, flags, state);
|
||||
// Load value out of jump table
|
||||
let inst = Inst::SLoad32 {
|
||||
rd: rtmp2,
|
||||
|
@ -1266,7 +1383,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
// Add base of jump table to jump-table-sourced block offset
|
||||
let inst = Inst::AluRRR {
|
||||
alu_op: ALUOp::Add64,
|
||||
|
@ -1274,22 +1391,30 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
// Branch to computed address. (`targets` here is only used for successor queries
|
||||
// and is not needed for emission.)
|
||||
let inst = Inst::IndirectBr {
|
||||
rn: rtmp1.to_reg(),
|
||||
targets: vec![],
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
// Emit jump table (table of 32-bit offsets).
|
||||
for target in targets {
|
||||
let off = target.as_offset_words() * 4;
|
||||
let off = i32::try_from(off).unwrap();
|
||||
// cast i32 to u32 (two's-complement)
|
||||
let off = off as u32;
|
||||
sink.put4(off);
|
||||
let jt_off = sink.cur_offset();
|
||||
for &target in info.targets.iter() {
|
||||
let word_off = sink.cur_offset();
|
||||
let off_into_table = word_off - jt_off;
|
||||
sink.use_label_at_offset(
|
||||
word_off,
|
||||
target.as_label().unwrap(),
|
||||
LabelUse::PCRel32,
|
||||
);
|
||||
sink.put4(off_into_table);
|
||||
}
|
||||
|
||||
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
|
||||
// disable the worst-case-size check in this case.
|
||||
start_off = sink.cur_offset();
|
||||
}
|
||||
&Inst::LoadConst64 { rd, const_data } => {
|
||||
let inst = Inst::ULoad64 {
|
||||
|
@ -1297,11 +1422,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
sink.put8(const_data);
|
||||
}
|
||||
&Inst::LoadExtName {
|
||||
|
@ -1315,11 +1440,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
inst.emit(sink, flags, state);
|
||||
sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
|
||||
if flags.emit_all_ones_funcaddrs() {
|
||||
sink.put8(u64::max_value());
|
||||
|
@ -1327,53 +1452,82 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
|||
sink.put8(0);
|
||||
}
|
||||
}
|
||||
&Inst::LoadAddr { rd, ref mem } => match *mem {
|
||||
MemArg::FPOffset(fp_off) => {
|
||||
let alu_op = if fp_off < 0 {
|
||||
ALUOp::Sub64
|
||||
} else {
|
||||
ALUOp::Add64
|
||||
};
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
|
||||
{
|
||||
let inst = Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd,
|
||||
imm12,
|
||||
rn: fp_reg(),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
} else {
|
||||
let const_insts =
|
||||
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
|
||||
for inst in const_insts {
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
let inst = Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: fp_reg(),
|
||||
rm: rd.to_reg(),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
&Inst::LoadAddr { rd, ref mem } => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
_ => unimplemented!("{:?}", mem),
|
||||
},
|
||||
&Inst::GetPinnedReg { rd } => {
|
||||
let inst = Inst::Mov {
|
||||
rd,
|
||||
rm: xreg(PINNED_REG),
|
||||
|
||||
let (reg, offset) = match mem {
|
||||
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
|
||||
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
|
||||
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
let abs_offset = if offset < 0 {
|
||||
-offset as u64
|
||||
} else {
|
||||
offset as u64
|
||||
};
|
||||
let alu_op = if offset < 0 {
|
||||
ALUOp::Sub64
|
||||
} else {
|
||||
ALUOp::Add64
|
||||
};
|
||||
|
||||
if offset == 0 {
|
||||
let mov = Inst::mov(rd, reg);
|
||||
mov.emit(sink, flags, state);
|
||||
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
|
||||
let add = Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: reg,
|
||||
imm12,
|
||||
};
|
||||
add.emit(sink, flags, state);
|
||||
} else {
|
||||
// Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
|
||||
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
|
||||
// that no other instructions will be inserted here (we're emitting directly),
|
||||
// and a live range of `tmp2` should not span this instruction, so this use
|
||||
// should otherwise be correct.
|
||||
debug_assert!(rd.to_reg() != tmp2_reg());
|
||||
debug_assert!(reg != tmp2_reg());
|
||||
let tmp = writable_tmp2_reg();
|
||||
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
|
||||
insn.emit(sink, flags, state);
|
||||
}
|
||||
let add = Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: reg,
|
||||
rm: tmp.to_reg(),
|
||||
};
|
||||
add.emit(sink, flags, state);
|
||||
}
|
||||
}
|
||||
&Inst::SetPinnedReg { rm } => {
|
||||
let inst = Inst::Mov {
|
||||
rd: Writable::from_reg(xreg(PINNED_REG)),
|
||||
rm,
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
&Inst::VirtualSPOffsetAdj { offset } => {
|
||||
debug!(
|
||||
"virtual sp offset adjusted by {} -> {}",
|
||||
offset,
|
||||
state.virtual_sp_offset + offset
|
||||
);
|
||||
state.virtual_sp_offset += offset;
|
||||
}
|
||||
&Inst::EmitIsland { needed_space } => {
|
||||
if sink.island_needed(needed_space + 4) {
|
||||
let jump_around_label = sink.get_label();
|
||||
let jmp = Inst::Jump {
|
||||
dest: BranchTarget::Label(jump_around_label),
|
||||
};
|
||||
jmp.emit(sink, flags, state);
|
||||
sink.emit_island();
|
||||
sink.bind_label(jump_around_label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let end_off = sink.cur_offset();
|
||||
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
|
|||
use crate::isa::test_utils;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
#[test]
|
||||
|
@ -1310,38 +1311,68 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(32768),
|
||||
mem: MemArg::FPOffset(32768, I8),
|
||||
srcloc: None,
|
||||
},
|
||||
"0F0090D2EF011D8BE10140F9",
|
||||
"movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
"100090D2B063308B010240F9",
|
||||
"movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(-32768),
|
||||
mem: MemArg::FPOffset(-32768, I8),
|
||||
srcloc: None,
|
||||
},
|
||||
"EFFF8F92EF011D8BE10140F9",
|
||||
"movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
"F0FF8F92B063308B010240F9",
|
||||
"movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(1048576), // 2^20
|
||||
mem: MemArg::FPOffset(1048576, I8), // 2^20
|
||||
srcloc: None,
|
||||
},
|
||||
"0F02A0D2EF011D8BE10140F9",
|
||||
"movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
"1002A0D2B063308B010240F9",
|
||||
"movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
|
||||
mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1
|
||||
srcloc: None,
|
||||
},
|
||||
"2F0080D20F02A0F2EF011D8BE10140F9",
|
||||
"movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
"300080D21002A0F2B063308B010240F9",
|
||||
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 8, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"E18040F8",
|
||||
"ldur x1, [x7, #8]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 1024, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"E10042F9",
|
||||
"ldr x1, [x7, #1024]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 1048576, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"1002A0D2F060308B010240F9",
|
||||
"movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
|
@ -1801,6 +1832,7 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
|
@ -1811,6 +1843,7 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
|
@ -1821,6 +1854,7 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
|
@ -1831,10 +1865,83 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmeq,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
},
|
||||
"E38E386E",
|
||||
"cmeq v3.16b, v23.16b, v24.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmgt,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
},
|
||||
"E336384E",
|
||||
"cmgt v3.16b, v23.16b, v24.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmge,
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I8X16,
|
||||
},
|
||||
"373D2C4E",
|
||||
"cmge v23.16b, v9.16b, v12.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmhi,
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"2534216E",
|
||||
"cmhi v5.16b, v1.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmhs,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I8X16,
|
||||
},
|
||||
"483C2F6E",
|
||||
"cmhs v8.16b, v2.16b, v15.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"2258206E",
|
||||
"mvn v2.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::Extend {
|
||||
rd: writable_xreg(1),
|
||||
|
@ -1955,7 +2062,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Zero(xreg(8)),
|
||||
},
|
||||
|
@ -1963,7 +2070,7 @@ fn test_aarch64_binemit() {
|
|||
"cbz x8, 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::NotZero(xreg(8)),
|
||||
},
|
||||
|
@ -1971,7 +2078,7 @@ fn test_aarch64_binemit() {
|
|||
"cbnz x8, 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Eq),
|
||||
},
|
||||
|
@ -1979,7 +2086,7 @@ fn test_aarch64_binemit() {
|
|||
"b.eq 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ne),
|
||||
},
|
||||
|
@ -1988,7 +2095,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Hs),
|
||||
},
|
||||
|
@ -1996,7 +2103,7 @@ fn test_aarch64_binemit() {
|
|||
"b.hs 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Lo),
|
||||
},
|
||||
|
@ -2004,7 +2111,7 @@ fn test_aarch64_binemit() {
|
|||
"b.lo 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Mi),
|
||||
},
|
||||
|
@ -2012,7 +2119,7 @@ fn test_aarch64_binemit() {
|
|||
"b.mi 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Pl),
|
||||
},
|
||||
|
@ -2020,7 +2127,7 @@ fn test_aarch64_binemit() {
|
|||
"b.pl 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Vs),
|
||||
},
|
||||
|
@ -2028,7 +2135,7 @@ fn test_aarch64_binemit() {
|
|||
"b.vs 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Vc),
|
||||
},
|
||||
|
@ -2036,7 +2143,7 @@ fn test_aarch64_binemit() {
|
|||
"b.vc 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Hi),
|
||||
},
|
||||
|
@ -2044,7 +2151,7 @@ fn test_aarch64_binemit() {
|
|||
"b.hi 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ls),
|
||||
},
|
||||
|
@ -2052,7 +2159,7 @@ fn test_aarch64_binemit() {
|
|||
"b.ls 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ge),
|
||||
},
|
||||
|
@ -2060,7 +2167,7 @@ fn test_aarch64_binemit() {
|
|||
"b.ge 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Lt),
|
||||
},
|
||||
|
@ -2068,7 +2175,7 @@ fn test_aarch64_binemit() {
|
|||
"b.lt 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Gt),
|
||||
},
|
||||
|
@ -2076,7 +2183,7 @@ fn test_aarch64_binemit() {
|
|||
"b.gt 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Le),
|
||||
},
|
||||
|
@ -2084,7 +2191,7 @@ fn test_aarch64_binemit() {
|
|||
"b.le 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Al),
|
||||
},
|
||||
|
@ -2092,7 +2199,7 @@ fn test_aarch64_binemit() {
|
|||
"b.al 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::CondBrLowered {
|
||||
Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Nv),
|
||||
},
|
||||
|
@ -2101,7 +2208,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::CondBrLoweredCompound {
|
||||
Inst::CondBr {
|
||||
taken: BranchTarget::ResolvedOffset(64),
|
||||
not_taken: BranchTarget::ResolvedOffset(128),
|
||||
kind: CondBrKind::Cond(Cond::Le),
|
||||
|
@ -2112,11 +2219,13 @@ fn test_aarch64_binemit() {
|
|||
|
||||
insns.push((
|
||||
Inst::Call {
|
||||
dest: ExternalName::testcase("test0"),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::Call,
|
||||
info: Box::new(CallInfo {
|
||||
dest: ExternalName::testcase("test0"),
|
||||
uses: Vec::new(),
|
||||
defs: Vec::new(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::Call,
|
||||
}),
|
||||
},
|
||||
"00000094",
|
||||
"bl 0",
|
||||
|
@ -2124,11 +2233,13 @@ fn test_aarch64_binemit() {
|
|||
|
||||
insns.push((
|
||||
Inst::CallInd {
|
||||
rn: xreg(10),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::CallIndirect,
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: xreg(10),
|
||||
uses: Vec::new(),
|
||||
defs: Vec::new(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::CallIndirect,
|
||||
}),
|
||||
},
|
||||
"40013FD6",
|
||||
"blr x10",
|
||||
|
@ -2137,7 +2248,7 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::IndirectBr {
|
||||
rn: xreg(3),
|
||||
targets: vec![1, 2, 3],
|
||||
targets: vec![],
|
||||
},
|
||||
"60001FD6",
|
||||
"br x3",
|
||||
|
@ -2148,7 +2259,7 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::Adr {
|
||||
rd: writable_xreg(15),
|
||||
label: MemLabel::PCRel((1 << 20) - 4),
|
||||
off: (1 << 20) - 4,
|
||||
},
|
||||
"EFFF7F10",
|
||||
"adr x15, pc+1048572",
|
||||
|
@ -2163,6 +2274,15 @@ fn test_aarch64_binemit() {
|
|||
"mov v8.8b, v4.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuMove128 {
|
||||
rd: writable_vreg(17),
|
||||
rn: vreg(26),
|
||||
},
|
||||
"511FBA4E",
|
||||
"mov v17.16b, v26.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRR {
|
||||
fpu_op: FPUOp1::Abs32,
|
||||
|
@ -2399,6 +2519,46 @@ fn test_aarch64_binemit() {
|
|||
"fmadd d15, d30, d31, d1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(5),
|
||||
},
|
||||
"A204202F",
|
||||
"ushr v2.2s, v5.2s, #32",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(5),
|
||||
},
|
||||
"A204417F",
|
||||
"ushr d2, d5, #63",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(10),
|
||||
},
|
||||
"44553F2F",
|
||||
"sli v4.2s, v10.2s, #31",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(10),
|
||||
},
|
||||
"44557F7F",
|
||||
"sli d4, d10, #63",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuToInt {
|
||||
op: FpuToIntOp::F32ToU32,
|
||||
|
@ -2685,6 +2845,15 @@ fn test_aarch64_binemit() {
|
|||
"ldr d16, pc+8 ; b 12 ; data.f64 1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst128 {
|
||||
rd: writable_vreg(5),
|
||||
const_data: 0x0f0e0d0c0b0a09080706050403020100,
|
||||
},
|
||||
"4500009C05000014000102030405060708090A0B0C0D0E0F",
|
||||
"ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuCSel32 {
|
||||
rd: writable_vreg(1),
|
||||
|
@ -2791,19 +2960,11 @@ fn test_aarch64_binemit() {
|
|||
let actual_printing = insn.show_rru(Some(&rru));
|
||||
assert_eq!(expected_printing, actual_printing);
|
||||
|
||||
// Check the encoding is as expected.
|
||||
let text_size = {
|
||||
let mut code_sec = MachSectionSize::new(0);
|
||||
insn.emit(&mut code_sec, &flags);
|
||||
code_sec.size()
|
||||
};
|
||||
|
||||
let mut sink = test_utils::TestCodeSink::new();
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, text_size);
|
||||
let code_sec = sections.get_section(code_idx);
|
||||
insn.emit(code_sec, &flags);
|
||||
sections.emit(&mut sink);
|
||||
let mut buffer = MachBuffer::new();
|
||||
insn.emit(&mut buffer, &flags, &mut Default::default());
|
||||
let buffer = buffer.finish();
|
||||
buffer.emit(&mut sink);
|
||||
let actual_encoding = &sink.stringify();
|
||||
assert_eq!(expected_encoding, actual_encoding);
|
||||
}
|
||||
|
|
|
@ -106,6 +106,85 @@ impl SImm7Scaled {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct FPULeftShiftImm {
|
||||
pub amount: u8,
|
||||
pub lane_size_in_bits: u8,
|
||||
}
|
||||
|
||||
impl FPULeftShiftImm {
|
||||
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||
if amount < lane_size_in_bits {
|
||||
Some(Self {
|
||||
amount,
|
||||
lane_size_in_bits,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enc(&self) -> u32 {
|
||||
debug_assert!(self.lane_size_in_bits.is_power_of_two());
|
||||
debug_assert!(self.lane_size_in_bits > self.amount);
|
||||
// The encoding of the immediate follows the table below,
|
||||
// where xs encode the shift amount.
|
||||
//
|
||||
// | lane_size_in_bits | encoding |
|
||||
// +------------------------------+
|
||||
// | 8 | 0001xxx |
|
||||
// | 16 | 001xxxx |
|
||||
// | 32 | 01xxxxx |
|
||||
// | 64 | 1xxxxxx |
|
||||
//
|
||||
// The highest one bit is represented by `lane_size_in_bits`. Since
|
||||
// `lane_size_in_bits` is a power of 2 and `amount` is less
|
||||
// than `lane_size_in_bits`, they can be ORed
|
||||
// together to produced the encoded value.
|
||||
u32::from(self.lane_size_in_bits | self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct FPURightShiftImm {
|
||||
pub amount: u8,
|
||||
pub lane_size_in_bits: u8,
|
||||
}
|
||||
|
||||
impl FPURightShiftImm {
|
||||
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||
if amount > 0 && amount <= lane_size_in_bits {
|
||||
Some(Self {
|
||||
amount,
|
||||
lane_size_in_bits,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enc(&self) -> u32 {
|
||||
debug_assert_ne!(0, self.amount);
|
||||
// The encoding of the immediate follows the table below,
|
||||
// where xs encodes the negated shift amount.
|
||||
//
|
||||
// | lane_size_in_bits | encoding |
|
||||
// +------------------------------+
|
||||
// | 8 | 0001xxx |
|
||||
// | 16 | 001xxxx |
|
||||
// | 32 | 01xxxxx |
|
||||
// | 64 | 1xxxxxx |
|
||||
//
|
||||
// The shift amount is negated such that a shift ammount
|
||||
// of 1 (in 64-bit) is encoded as 0b111111 and a shift
|
||||
// amount of 64 is encoded as 0b000000,
|
||||
// in the bottom 6 bits.
|
||||
u32::from((self.lane_size_in_bits * 2) - self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
/// a 9-bit signed offset.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SImm9 {
|
||||
|
@ -134,6 +213,11 @@ impl SImm9 {
|
|||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32) & 0x1ff
|
||||
}
|
||||
|
||||
/// Signed value of immediate.
|
||||
pub fn value(&self) -> i32 {
|
||||
self.value as i32
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsigned, scaled 12-bit offset.
|
||||
|
@ -172,6 +256,16 @@ impl UImm12Scaled {
|
|||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
|
||||
}
|
||||
|
||||
/// Value after scaling.
|
||||
pub fn value(&self) -> u32 {
|
||||
self.value as u32
|
||||
}
|
||||
|
||||
/// The value type which is the scaling base.
|
||||
pub fn scale_ty(&self) -> Type {
|
||||
self.scale_ty
|
||||
}
|
||||
}
|
||||
|
||||
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
|
||||
|
@ -566,6 +660,18 @@ impl ShowWithRRU for SImm7Scaled {
|
|||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for FPULeftShiftImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for FPURightShiftImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for SImm9 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,5 +1,6 @@
|
|||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::isa::aarch64::inst::InstSize;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
@ -20,23 +21,21 @@ pub const PINNED_REG: u8 = 21;
|
|||
const XREG_INDICES: [u8; 31] = [
|
||||
// X0 - X7
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
// X8 - X14
|
||||
40, 41, 42, 43, 44, 45, 46,
|
||||
// X15
|
||||
59,
|
||||
// X8 - X15
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
// X16, X17
|
||||
47, 48,
|
||||
58, 59,
|
||||
// X18
|
||||
60,
|
||||
// X19, X20
|
||||
49, 50,
|
||||
48, 49,
|
||||
// X21, put aside because it's the pinned register.
|
||||
58,
|
||||
57,
|
||||
// X22 - X28
|
||||
51, 52, 53, 54, 55, 56, 57,
|
||||
// X29
|
||||
50, 51, 52, 53, 54, 55, 56,
|
||||
// X29 (FP)
|
||||
61,
|
||||
// X30
|
||||
// X30 (LR)
|
||||
62,
|
||||
];
|
||||
|
||||
|
@ -125,14 +124,17 @@ pub fn writable_fp_reg() -> Writable<Reg> {
|
|||
Writable::from_reg(fp_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the "spill temp" register. This register is used to
|
||||
/// compute the address of a spill slot when a direct offset addressing mode from
|
||||
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
|
||||
/// and reserve it for this purpose for simplicity; otherwise we need a
|
||||
/// multi-stage analysis where we first determine how many spill slots we have,
|
||||
/// then perhaps remove the reg from the pool and recompute regalloc.
|
||||
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
|
||||
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
|
||||
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
|
||||
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
|
||||
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
|
||||
///
|
||||
/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
|
||||
/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
|
||||
/// to live through call instructions.
|
||||
pub fn spilltmp_reg() -> Reg {
|
||||
xreg(15)
|
||||
xreg(16)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the spilltmp reg.
|
||||
|
@ -140,6 +142,20 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
|
|||
Writable::from_reg(spilltmp_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the second temp register. We need this in some edge cases
|
||||
/// where we need both the spilltmp and another temporary.
|
||||
///
|
||||
/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
|
||||
/// free to use otherwise.
|
||||
pub fn tmp2_reg() -> Reg {
|
||||
xreg(17)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the tmp2 reg.
|
||||
pub fn writable_tmp2_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(tmp2_reg())
|
||||
}
|
||||
|
||||
/// Create the register universe for AArch64.
|
||||
pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = vec![];
|
||||
|
@ -173,7 +189,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
|
||||
for i in 0u8..32u8 {
|
||||
// See above for excluded registers.
|
||||
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
|
||||
if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
|
||||
continue;
|
||||
}
|
||||
let reg = Reg::new_real(
|
||||
|
@ -191,7 +207,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: x_reg_base as usize,
|
||||
last: x_reg_last as usize,
|
||||
suggested_scratch: Some(XREG_INDICES[13] as usize),
|
||||
suggested_scratch: Some(XREG_INDICES[19] as usize),
|
||||
});
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: v_reg_base as usize,
|
||||
|
@ -211,7 +227,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
regs.len()
|
||||
};
|
||||
|
||||
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
|
||||
regs.push((xreg(16).to_real_reg(), "x16".to_string()));
|
||||
regs.push((xreg(17).to_real_reg(), "x17".to_string()));
|
||||
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
|
||||
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
|
||||
regs.push((link_reg().to_real_reg(), "lr".to_string()));
|
||||
|
@ -259,13 +276,17 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
|
|||
s
|
||||
}
|
||||
|
||||
/// Show a vector register when its use as a 32-bit or 64-bit float is known.
|
||||
/// Show a vector register.
|
||||
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
return s;
|
||||
}
|
||||
let prefix = if size.is32() { "s" } else { "d" };
|
||||
let prefix = match size {
|
||||
InstSize::Size32 => "s",
|
||||
InstSize::Size64 => "d",
|
||||
InstSize::Size128 => "q",
|
||||
};
|
||||
s.replace_range(0..1, prefix);
|
||||
s
|
||||
}
|
||||
|
@ -291,3 +312,17 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
|||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
match ty {
|
||||
I8X16 => s.push_str(".16b"),
|
||||
F32X2 => s.push_str(".2s"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
s
|
||||
}
|
||||
|
|
|
@ -14,12 +14,14 @@ use crate::ir::Inst as IRInst;
|
|||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::CodegenResult;
|
||||
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
|
||||
use super::lower_inst;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
|
||||
//============================================================================
|
||||
|
@ -104,18 +106,11 @@ pub(crate) enum ResultRegImmShift {
|
|||
}
|
||||
|
||||
//============================================================================
|
||||
// Instruction input and output "slots".
|
||||
// Instruction input "slots".
|
||||
//
|
||||
// We use these types to refer to operand numbers, and result numbers, together
|
||||
// with the associated instruction, in a type-safe way.
|
||||
|
||||
/// Identifier for a particular output of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InsnOutput {
|
||||
pub(crate) insn: IRInst,
|
||||
pub(crate) output: usize,
|
||||
}
|
||||
|
||||
/// Identifier for a particular input of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InsnInput {
|
||||
|
@ -123,95 +118,55 @@ pub(crate) struct InsnInput {
|
|||
pub(crate) input: usize,
|
||||
}
|
||||
|
||||
/// Producer of a value: either a previous instruction's output, or a register that will be
|
||||
/// codegen'd separately.
|
||||
/// Identifier for a particular output of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum InsnInputSource {
|
||||
Output(InsnOutput),
|
||||
Reg(Reg),
|
||||
}
|
||||
|
||||
impl InsnInputSource {
|
||||
fn as_output(self) -> Option<InsnOutput> {
|
||||
match self {
|
||||
InsnInputSource::Output(o) => Some(o),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
|
||||
assert!(num <= ctx.num_inputs(output.insn));
|
||||
InsnInput {
|
||||
insn: output.insn,
|
||||
input: num,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
|
||||
/// register otherwise.
|
||||
fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
|
||||
if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
|
||||
let out = InsnOutput {
|
||||
insn: input_inst,
|
||||
output: result_num,
|
||||
};
|
||||
InsnInputSource::Output(out)
|
||||
} else {
|
||||
let reg = ctx.input(input.insn, input.input);
|
||||
InsnInputSource::Reg(reg)
|
||||
}
|
||||
pub(crate) struct InsnOutput {
|
||||
pub(crate) insn: IRInst,
|
||||
pub(crate) output: usize,
|
||||
}
|
||||
|
||||
//============================================================================
|
||||
// Lowering: convert instruction outputs to result types.
|
||||
// Lowering: convert instruction inputs to forms that we can use.
|
||||
|
||||
/// Lower an instruction output to a 64-bit constant, if possible.
|
||||
pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
|
||||
/// Lower an instruction input to a 64-bit constant, if possible.
|
||||
pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
|
||||
let input = ctx.get_input(input.insn, input.input);
|
||||
input.constant
|
||||
}
|
||||
|
||||
/// Lower an instruction input to a constant register-shift amount, if possible.
|
||||
pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> Option<ShiftOpShiftImm> {
|
||||
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<u128> {
|
||||
if out.output > 0 {
|
||||
None
|
||||
} else {
|
||||
let inst_data = ctx.data(out.insn);
|
||||
if inst_data.opcode() == Opcode::Null {
|
||||
Some(0)
|
||||
} else {
|
||||
match inst_data {
|
||||
&InstructionData::UnaryImm { opcode: _, imm } => {
|
||||
// Only has Into for i64; we use u64 elsewhere, so we cast.
|
||||
let imm: i64 = imm.into();
|
||||
Some(imm as u64)
|
||||
}
|
||||
&InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
|
||||
&InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
|
||||
&InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
|
||||
_ => None,
|
||||
|
||||
match inst_data {
|
||||
&InstructionData::UnaryConst {
|
||||
opcode: _,
|
||||
constant_handle,
|
||||
} => {
|
||||
let mut bytes = [0u8; 16];
|
||||
let c = ctx.get_constant_data(constant_handle).clone().into_vec();
|
||||
assert_eq!(c.len(), 16);
|
||||
bytes.copy_from_slice(&c);
|
||||
Some(u128::from_le_bytes(bytes))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<f32> {
|
||||
output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<f64> {
|
||||
output_to_const(ctx, out).map(|value| f64::from_bits(value))
|
||||
}
|
||||
|
||||
/// Lower an instruction output to a constant register-shift amount, if possible.
|
||||
pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<ShiftOpShiftImm> {
|
||||
output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
|
||||
}
|
||||
|
||||
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
|
||||
/// parameter to `input_to_*` below.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
|
@ -237,9 +192,9 @@ impl NarrowValueMode {
|
|||
}
|
||||
}
|
||||
|
||||
/// Lower an instruction output to a reg.
|
||||
/// Allocate a register for an instruction output and return it.
|
||||
pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
|
||||
ctx.output(out.insn, out.output)
|
||||
ctx.get_output(out.insn, out.output)
|
||||
}
|
||||
|
||||
/// Lower an instruction input to a reg.
|
||||
|
@ -252,13 +207,31 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> Reg {
|
||||
debug!("input_to_reg: input {:?}", input);
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let from_bits = ty_bits(ty) as u8;
|
||||
let in_reg = ctx.input(input.insn, input.input);
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
let in_reg = if let Some(c) = inputs.constant {
|
||||
let masked = if from_bits < 64 {
|
||||
c & ((1u64 << from_bits) - 1)
|
||||
} else {
|
||||
c
|
||||
};
|
||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
||||
for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
to_reg.to_reg()
|
||||
} else {
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
};
|
||||
|
||||
match (narrow_mode, from_bits) {
|
||||
(NarrowValueMode::None, _) => in_reg,
|
||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -269,7 +242,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
tmp.to_reg()
|
||||
}
|
||||
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -282,18 +255,23 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
|
||||
|
||||
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
signed: false,
|
||||
from_bits,
|
||||
to_bits: 64,
|
||||
});
|
||||
tmp.to_reg()
|
||||
if inputs.constant.is_some() {
|
||||
// Constants are zero-extended to full 64-bit width on load already.
|
||||
in_reg
|
||||
} else {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
signed: false,
|
||||
from_bits,
|
||||
to_bits: 64,
|
||||
});
|
||||
tmp.to_reg()
|
||||
}
|
||||
}
|
||||
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -304,6 +282,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
tmp.to_reg()
|
||||
}
|
||||
(_, 64) => in_reg,
|
||||
(_, 128) => in_reg,
|
||||
|
||||
_ => panic!(
|
||||
"Unsupported input width: input ty {} bits {} mode {:?}",
|
||||
|
@ -313,8 +292,6 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
||||
/// This does not actually codegen the source instruction; it just uses the
|
||||
/// vreg into which the source instruction will generate its value.
|
||||
///
|
||||
/// The `narrow_mode` flag indicates whether the consumer of this value needs
|
||||
/// the high bits clear. For many operations, such as an add/sub/mul or any
|
||||
|
@ -330,23 +307,18 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRS {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
let insn = out.insn;
|
||||
assert!(out.output <= ctx.num_outputs(insn));
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
if let Some((insn, 0)) = inputs.inst {
|
||||
let op = ctx.data(insn).opcode();
|
||||
|
||||
if op == Opcode::Ishl {
|
||||
let shiftee = get_input(ctx, out, 0);
|
||||
let shift_amt = get_input(ctx, out, 1);
|
||||
let shiftee = InsnInput { insn, input: 0 };
|
||||
let shift_amt = InsnInput { insn, input: 1 };
|
||||
|
||||
// Can we get the shift amount as an immediate?
|
||||
if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
|
||||
if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
|
||||
let reg = input_to_reg(ctx, shiftee, narrow_mode);
|
||||
ctx.merged(insn);
|
||||
ctx.merged(shift_amt_out.insn);
|
||||
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
|
||||
}
|
||||
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
|
||||
let reg = input_to_reg(ctx, shiftee, narrow_mode);
|
||||
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -364,11 +336,10 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSE {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
let insn = out.insn;
|
||||
assert!(out.output <= ctx.num_outputs(insn));
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
if let Some((insn, 0)) = inputs.inst {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let out_ty = ctx.output_ty(insn, out.output);
|
||||
let out_ty = ctx.output_ty(insn, 0);
|
||||
let out_bits = ty_bits(out_ty);
|
||||
|
||||
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
|
||||
|
@ -378,7 +349,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
&& ((narrow_mode.is_32bit() && out_bits < 32)
|
||||
|| (!narrow_mode.is_32bit() && out_bits < 64))
|
||||
{
|
||||
let reg = output_to_reg(ctx, out);
|
||||
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
let extendop = match (narrow_mode, out_bits) {
|
||||
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
|
||||
ExtendOp::SXTB
|
||||
|
@ -402,15 +373,14 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
return ResultRSE::RegExtend(reg.to_reg(), extendop);
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
|
||||
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
|
||||
if op == Opcode::Uextend || op == Opcode::Sextend {
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
let sign_extend = op == Opcode::Sextend;
|
||||
let extendee = get_input(ctx, out, 0);
|
||||
let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
|
||||
let inner_ty = ctx.input_ty(insn, 0);
|
||||
let inner_bits = ty_bits(inner_ty);
|
||||
assert!(inner_bits < out_bits);
|
||||
let extendop = match (sign_extend, inner_bits) {
|
||||
|
@ -424,8 +394,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
(false, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
|
||||
ctx.merged(insn);
|
||||
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
}
|
||||
|
@ -438,12 +407,9 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSEImm12 {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRSEImm12::Imm12(i);
|
||||
}
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
||||
return ResultRSEImm12::Imm12(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -455,14 +421,11 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSImmLogic {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
let ty = ctx.output_ty(out.insn, out.output);
|
||||
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
|
||||
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRSImmLogic::ImmLogic(i);
|
||||
}
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
|
||||
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
|
||||
return ResultRSImmLogic::ImmLogic(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -473,12 +436,9 @@ pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
|
|||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> ResultRegImmShift {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRegImmShift::ImmShift(immshift);
|
||||
}
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
|
||||
return ResultRegImmShift::ImmShift(immshift);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -584,12 +544,10 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
|
||||
// mul instructions (Load/StoreComplex don't include scale factors).
|
||||
|
||||
// Handle one reg and offset that fits in immediate, if possible.
|
||||
// Handle one reg and offset.
|
||||
if addends.len() == 1 {
|
||||
let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
||||
if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) {
|
||||
return memarg;
|
||||
}
|
||||
return MemArg::RegOffset(reg, offset as i64, elem_ty);
|
||||
}
|
||||
|
||||
// Handle two regs and a zero offset, if possible.
|
||||
|
@ -600,7 +558,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
// Otherwise, generate add instructions.
|
||||
let addr = ctx.tmp(RegClass::I64, I64);
|
||||
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// Get the const into a reg.
|
||||
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
||||
|
@ -612,7 +570,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
// In an addition, the stack register is the zero register, so divert it to another
|
||||
// register just before doing the actual add.
|
||||
let reg = if reg == stack_reg() {
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::Mov {
|
||||
rd: tmp,
|
||||
rm: stack_reg(),
|
||||
|
@ -659,6 +617,14 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::load_fp_constant64(rd, value));
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
rd: Writable<Reg>,
|
||||
value: u128,
|
||||
) {
|
||||
ctx.emit(Inst::load_fp_constant128(rd, value));
|
||||
}
|
||||
|
||||
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
|
||||
match cc {
|
||||
IntCC::Equal => Cond::Eq,
|
||||
|
@ -750,6 +716,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
B64 | I64 | F64 => 64,
|
||||
B128 | I128 => 128,
|
||||
IFLAGS | FFLAGS => 32,
|
||||
I8X16 | B8X16 => 128,
|
||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||
}
|
||||
}
|
||||
|
@ -757,7 +724,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||
match ty {
|
||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||
F32 | F64 | B128 | I128 => false,
|
||||
F32 | F64 | B128 | I128 | I8X16 => false,
|
||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||
}
|
||||
|
@ -823,24 +790,29 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
|
||||
/// Checks for an instance of `op` feeding the given input.
|
||||
pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
|
||||
c: &mut C,
|
||||
input: InsnInput,
|
||||
op: Opcode,
|
||||
) -> Option<IRInst> {
|
||||
if let InsnInputSource::Output(out) = input_source(c, input) {
|
||||
let data = c.data(out.insn);
|
||||
let inputs = c.get_input(input.insn, input.input);
|
||||
debug!(
|
||||
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
|
||||
input, inputs, op
|
||||
);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
debug!(" -> input inst {:?}", data);
|
||||
if data.opcode() == op {
|
||||
c.merged(out.insn);
|
||||
return Some(out.insn);
|
||||
return Some(src_inst);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
|
||||
/// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
|
||||
/// Bint or a bitcast).
|
||||
///
|
||||
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
|
||||
/// a bit more generic.
|
||||
|
@ -850,21 +822,19 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
|
|||
op: Opcode,
|
||||
conv: Opcode,
|
||||
) -> Option<IRInst> {
|
||||
if let Some(ret) = maybe_input_insn(c, input, op) {
|
||||
return Some(ret);
|
||||
}
|
||||
|
||||
if let InsnInputSource::Output(out) = input_source(c, input) {
|
||||
let data = c.data(out.insn);
|
||||
let inputs = c.get_input(input.insn, input.input);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
if data.opcode() == op {
|
||||
return Some(src_inst);
|
||||
}
|
||||
if data.opcode() == conv {
|
||||
let conv_insn = out.insn;
|
||||
let conv_input = InsnInput {
|
||||
insn: conv_insn,
|
||||
input: 0,
|
||||
};
|
||||
if let Some(inner) = maybe_input_insn(c, conv_input, op) {
|
||||
c.merged(conv_insn);
|
||||
return Some(inner);
|
||||
let inputs = c.get_input(src_inst, 0);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
if data.opcode() == op {
|
||||
return Some(src_inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -876,6 +846,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
insn: IRInst,
|
||||
is_signed: bool,
|
||||
) {
|
||||
debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let bits = ty_bits(ty);
|
||||
let narrow_mode = match (bits <= 32, is_signed) {
|
||||
|
@ -897,6 +868,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
let ty = ctx.input_ty(insn, 0);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rd = writable_zero_reg();
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
|
@ -934,17 +906,21 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
|
|||
impl LowerBackend for AArch64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst);
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst)
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
) {
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
|
||||
}
|
||||
|
||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||
Some(xreg(PINNED_REG))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,17 +1,20 @@
|
|||
//! Lower a single Cranelift instruction into vcode.
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::condcodes::FloatCC;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
||||
use crate::isa::aarch64::abi::*;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
|
||||
use regalloc::RegClass;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::SmallVec;
|
||||
|
@ -19,7 +22,10 @@ use smallvec::SmallVec;
|
|||
use super::lower::*;
|
||||
|
||||
/// Actually codegen an instruction's results into registers.
|
||||
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
||||
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
) -> CodegenResult<()> {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
|
||||
.map(|i| InsnInput { insn, input: i })
|
||||
|
@ -35,17 +41,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
|
||||
let value = output_to_const(ctx, outputs[0]).unwrap();
|
||||
let value = ctx.get_constant(insn).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_u64(ctx, rd, value);
|
||||
}
|
||||
Opcode::F32const => {
|
||||
let value = output_to_const_f32(ctx, outputs[0]).unwrap();
|
||||
let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f32(ctx, rd, value);
|
||||
}
|
||||
Opcode::F64const => {
|
||||
let value = output_to_const_f64(ctx, outputs[0]).unwrap();
|
||||
let value = f64::from_bits(ctx.get_constant(insn).unwrap());
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f64(ctx, rd, value);
|
||||
}
|
||||
|
@ -79,8 +85,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
} else {
|
||||
VecALUOp::UQAddScalar
|
||||
};
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
@ -91,6 +97,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec64 {
|
||||
rd,
|
||||
|
@ -110,8 +117,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
} else {
|
||||
VecALUOp::UQSubScalar
|
||||
};
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
@ -122,6 +129,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec64 {
|
||||
rd,
|
||||
|
@ -271,7 +279,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
});
|
||||
|
@ -297,7 +305,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 20;
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
@ -324,7 +332,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
nzcv: NZCV::new(false, false, false, false),
|
||||
cond: Cond::Eq,
|
||||
});
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(12),
|
||||
kind: CondBrKind::Cond(Cond::Vc),
|
||||
});
|
||||
|
@ -337,7 +345,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
});
|
||||
|
@ -493,7 +501,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
let tmp = ctx.tmp(RegClass::I64, ty);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: tmp,
|
||||
|
@ -516,7 +524,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
// Really ty_bits_size - rn, but the upper bits of the result are
|
||||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp,
|
||||
|
@ -529,7 +537,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
};
|
||||
|
||||
// Explicitly mask the rotation count.
|
||||
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op: ALUOp::And32,
|
||||
rd: tmp_masked_rm,
|
||||
|
@ -538,8 +546,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
});
|
||||
let tmp_masked_rm = tmp_masked_rm.to_reg();
|
||||
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp1,
|
||||
|
@ -578,7 +586,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
}
|
||||
immshift.imm &= ty_bits_size - 1;
|
||||
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmShift {
|
||||
alu_op: ALUOp::Lsr32,
|
||||
rd: tmp1,
|
||||
|
@ -683,7 +691,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
// and fix the sequence below to work properly for this.
|
||||
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
|
||||
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
||||
// the rest of the code is identical to the 64-bit version.
|
||||
|
@ -870,6 +878,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
||||
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
||||
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
||||
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
}
|
||||
|
@ -909,6 +918,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
(32, true) => Inst::FpuStore32 { rd, mem, srcloc },
|
||||
(64, false) => Inst::Store64 { rd, mem, srcloc },
|
||||
(64, true) => Inst::FpuStore64 { rd, mem, srcloc },
|
||||
(128, _) => Inst::FpuStore128 { rd, mem, srcloc },
|
||||
_ => panic!("Unsupported size in store"),
|
||||
});
|
||||
}
|
||||
|
@ -992,7 +1002,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
}
|
||||
|
||||
Opcode::Bitselect => {
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
|
@ -1145,12 +1155,66 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
(false, true) => NarrowValueMode::SignExtend64,
|
||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||
};
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
ctx.emit(Inst::CondSet { cond, rd });
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
ctx.emit(Inst::CondSet { cond, rd });
|
||||
} else {
|
||||
if ty != I8X16 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"unsupported simd type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
|
||||
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
// 'Less than' operations are implemented by swapping
|
||||
// the order of operands and using the 'greater than'
|
||||
// instructions.
|
||||
// 'Not equal' is implemented with 'equal' and inverting
|
||||
// the result.
|
||||
let (alu_op, swap) = match cond {
|
||||
Cond::Eq => (VecALUOp::Cmeq, false),
|
||||
Cond::Ne => (VecALUOp::Cmeq, false),
|
||||
Cond::Ge => (VecALUOp::Cmge, false),
|
||||
Cond::Gt => (VecALUOp::Cmgt, false),
|
||||
Cond::Le => (VecALUOp::Cmge, true),
|
||||
Cond::Lt => (VecALUOp::Cmgt, true),
|
||||
Cond::Hs => (VecALUOp::Cmhs, false),
|
||||
Cond::Hi => (VecALUOp::Cmhi, false),
|
||||
Cond::Ls => (VecALUOp::Cmhs, true),
|
||||
Cond::Lo => (VecALUOp::Cmhi, true),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if swap {
|
||||
std::mem::swap(&mut rn, &mut rm);
|
||||
}
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
});
|
||||
|
||||
if cond == Cond::Ne {
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
ty: I8X16,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Fcmp => {
|
||||
|
@ -1188,7 +1252,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
Opcode::Trapif | Opcode::Trapff => {
|
||||
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
|
||||
|
||||
let cond = if op == Opcode::Trapif {
|
||||
let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
|
||||
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
// The flags must not have been clobbered by any other
|
||||
// instruction between the iadd_ifcout and this instruction, as
|
||||
// verified by the CLIF validator; so we can simply use the
|
||||
// flags here.
|
||||
cond
|
||||
} else if op == Opcode::Trapif {
|
||||
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
|
@ -1211,7 +1283,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
// Branch around the break instruction with inverted cond. Go straight to lowered
|
||||
// one-target form; this is logically part of a single-in single-out template lowering.
|
||||
let cond = cond.invert();
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
|
@ -1233,11 +1305,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
Opcode::FuncAddr => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let extname = ctx.call_target(insn).unwrap().clone();
|
||||
let (extname, _) = ctx.call_target(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
rd,
|
||||
name: extname,
|
||||
name: Box::new(extname),
|
||||
srcloc: loc,
|
||||
offset: 0,
|
||||
});
|
||||
|
@ -1249,12 +1322,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
Opcode::SymbolValue => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let (extname, offset) = ctx.symbol_value(insn).unwrap();
|
||||
let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
rd,
|
||||
name: extname,
|
||||
name: Box::new(extname),
|
||||
srcloc: loc,
|
||||
offset,
|
||||
});
|
||||
|
@ -1262,54 +1335,50 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
|
||||
Opcode::Call | Opcode::CallIndirect => {
|
||||
let loc = ctx.srcloc(insn);
|
||||
let (abi, inputs) = match op {
|
||||
let (mut abi, inputs) = match op {
|
||||
Opcode::Call => {
|
||||
let extname = ctx.call_target(insn).unwrap();
|
||||
let (extname, dist) = ctx.call_target(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
|
||||
(
|
||||
AArch64ABICall::from_func(sig, &extname, dist, loc)?,
|
||||
&inputs[..],
|
||||
)
|
||||
}
|
||||
Opcode::CallIndirect => {
|
||||
let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() - 1 == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
|
||||
(AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
for inst in abi.gen_stack_pre_adjust().into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
abi.emit_stack_pre_adjust(ctx);
|
||||
assert!(inputs.len() == abi.num_args());
|
||||
for (i, input) in inputs.iter().enumerate() {
|
||||
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
|
||||
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
for inst in abi.gen_call().into_iter() {
|
||||
ctx.emit(inst);
|
||||
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
|
||||
}
|
||||
abi.emit_call(ctx);
|
||||
for (i, output) in outputs.iter().enumerate() {
|
||||
let retval_reg = output_to_reg(ctx, *output);
|
||||
ctx.emit(abi.gen_copy_retval_to_reg(i, retval_reg));
|
||||
}
|
||||
for inst in abi.gen_stack_post_adjust().into_iter() {
|
||||
ctx.emit(inst);
|
||||
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
|
||||
}
|
||||
abi.emit_stack_post_adjust(ctx);
|
||||
}
|
||||
|
||||
Opcode::GetPinnedReg => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::GetPinnedReg { rd });
|
||||
ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
|
||||
}
|
||||
|
||||
Opcode::SetPinnedReg => {
|
||||
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::SetPinnedReg { rm });
|
||||
ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
|
||||
}
|
||||
|
||||
Opcode::Spill
|
||||
|
@ -1340,8 +1409,20 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
panic!("Branch opcode reached non-branch lowering logic!");
|
||||
}
|
||||
|
||||
Opcode::Vconst
|
||||
| Opcode::Shuffle
|
||||
Opcode::Vconst => {
|
||||
let value = output_to_const_f128(ctx, outputs[0]).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f128(ctx, rd, value);
|
||||
}
|
||||
|
||||
Opcode::RawBitcast => {
|
||||
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
ctx.emit(Inst::gen_move(rd, rm, ty));
|
||||
}
|
||||
|
||||
Opcode::Shuffle
|
||||
| Opcode::Vsplit
|
||||
| Opcode::Vconcat
|
||||
| Opcode::Vselect
|
||||
|
@ -1350,15 +1431,20 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
| Opcode::Splat
|
||||
| Opcode::Insertlane
|
||||
| Opcode::Extractlane
|
||||
| Opcode::RawBitcast
|
||||
| Opcode::ScalarToVector
|
||||
| Opcode::Swizzle
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Uload8x8Complex
|
||||
| Opcode::Sload8x8
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Uload16x4Complex
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload32x2
|
||||
| Opcode::Sload32x2 => {
|
||||
| Opcode::Uload32x2Complex
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Sload32x2Complex => {
|
||||
// TODO
|
||||
panic!("Vector ops not implemented.");
|
||||
}
|
||||
|
@ -1452,54 +1538,38 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
Opcode::Fcopysign => {
|
||||
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
||||
//
|
||||
// (64 bits for example, 32-bit sequence is analogous):
|
||||
// This is a scalar Fcopysign.
|
||||
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
||||
//
|
||||
// MOV Xtmp1, Dinput0
|
||||
// MOV Xtmp2, Dinput1
|
||||
// AND Xtmp2, 0x8000_0000_0000_0000
|
||||
// BIC Xtmp1, 0x8000_0000_0000_0000
|
||||
// ORR Xtmp1, Xtmp1, Xtmp2
|
||||
// MOV Doutput, Xtmp1
|
||||
// mov vd, vn
|
||||
// ushr vtmp, vm, #63 / #31
|
||||
// sli vd, vtmp, #63 / #31
|
||||
|
||||
let ty = ctx.output_ty(insn, 0);
|
||||
let bits = ty_bits(ty);
|
||||
let bits = ty_bits(ty) as u8;
|
||||
assert!(bits == 32 || bits == 64);
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
||||
let imml = if bits == 32 {
|
||||
ImmLogic::maybe_from_u64(0x8000_0000, I32).unwrap()
|
||||
} else {
|
||||
ImmLogic::maybe_from_u64(0x8000_0000_0000_0000, I64).unwrap()
|
||||
};
|
||||
let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op,
|
||||
rd: tmp2,
|
||||
rn: tmp2.to_reg(),
|
||||
imml: imml.clone(),
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, F64);
|
||||
|
||||
// Copy LHS to rd.
|
||||
ctx.emit(Inst::FpuMove64 { rd, rn });
|
||||
|
||||
// Copy the sign bit to the lowest bit in tmp.
|
||||
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
});
|
||||
let alu_op = choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op,
|
||||
rd: tmp1,
|
||||
rn: tmp1.to_reg(),
|
||||
imml,
|
||||
});
|
||||
let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: tmp1,
|
||||
rn: tmp1.to_reg(),
|
||||
rm: tmp2.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::MovToVec64 {
|
||||
|
||||
// Insert the bit from tmp into the sign bit of rd.
|
||||
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
|
||||
rd,
|
||||
rn: tmp1.to_reg(),
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1531,14 +1601,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
} else {
|
||||
ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
|
||||
}
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
|
||||
let tmp = ctx.tmp(RegClass::V128, I128);
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
|
||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
||||
// we allow values that are slightly out of range:
|
||||
|
@ -1572,7 +1642,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
|
@ -1585,7 +1655,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
|
@ -1615,7 +1685,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
|
@ -1628,7 +1698,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
|
@ -1704,8 +1774,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
|
@ -1790,6 +1860,35 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
});
|
||||
}
|
||||
|
||||
Opcode::IaddIfcout => {
|
||||
// This is a two-output instruction that is needed for the
|
||||
// legalizer's explicit heap-check sequence, among possible other
|
||||
// uses. Its second output is a flags output only ever meant to
|
||||
// check for overflow using the
|
||||
// `backend.unsigned_add_overflow_condition()` condition.
|
||||
//
|
||||
// Note that the CLIF validation will ensure that no flag-setting
|
||||
// operation comes between this IaddIfcout and its use (e.g., a
|
||||
// Trapif). Thus, we can rely on implicit communication through the
|
||||
// processor flags rather than explicitly generating flags into a
|
||||
// register. We simply use the variant of the add instruction that
|
||||
// sets flags (`adds`) here.
|
||||
|
||||
// Ensure that the second output isn't directly called for: it
|
||||
// should only be used by a flags-consuming op, which will directly
|
||||
// understand this instruction and merge the comparison.
|
||||
assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg()));
|
||||
|
||||
// Now handle the iadd as above, except use an AddS opcode that sets
|
||||
// flags.
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
|
@ -1800,7 +1899,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
| Opcode::IaddCin
|
||||
| Opcode::IaddIfcin
|
||||
| Opcode::IaddCout
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::IaddCarry
|
||||
| Opcode::IaddIfcarry
|
||||
| Opcode::IsubBin
|
||||
|
@ -1849,6 +1947,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
| Opcode::X86Pmaxu
|
||||
| Opcode::X86Pmins
|
||||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Packss
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
|
@ -1860,14 +1960,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||
Opcode::AvgRound => unimplemented!(),
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
) {
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
// A block should end with at most two branches. The first may be a
|
||||
// conditional branch; a conditional branch can be followed only by an
|
||||
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
||||
|
@ -1881,18 +1983,14 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
let op0 = ctx.data(branches[0]).opcode();
|
||||
let op1 = ctx.data(branches[1]).opcode();
|
||||
|
||||
//println!(
|
||||
// "lowering two-branch group: opcodes are {:?} and {:?}",
|
||||
// op0, op1
|
||||
//);
|
||||
|
||||
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
|
||||
let taken = BranchTarget::Block(targets[0]);
|
||||
let taken = BranchTarget::Label(targets[0]);
|
||||
let not_taken = match op1 {
|
||||
Opcode::Jump => BranchTarget::Block(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
|
||||
Opcode::Jump => BranchTarget::Label(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
|
||||
_ => unreachable!(), // assert above.
|
||||
};
|
||||
|
||||
match op0 {
|
||||
Opcode::Brz | Opcode::Brnz => {
|
||||
let flag_input = InsnInput {
|
||||
|
@ -1952,6 +2050,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
Opcode::BrIcmp => {
|
||||
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
let ty = ctx.input_ty(branches[0], 0);
|
||||
let bits = ty_bits(ty);
|
||||
|
@ -1984,13 +2084,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
kind,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::Brif => {
|
||||
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
let flag_input = InsnInput {
|
||||
insn: branches[0],
|
||||
|
@ -2001,7 +2103,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
kind,
|
||||
});
|
||||
} else {
|
||||
// If the ifcmp result is actually placed in a
|
||||
|
@ -2011,7 +2113,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2019,6 +2121,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Brff => {
|
||||
let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_fp_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
let flag_input = InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
|
@ -2028,7 +2131,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
kind,
|
||||
});
|
||||
} else {
|
||||
// If the ffcmp result is actually placed in a
|
||||
|
@ -2038,7 +2141,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2055,12 +2158,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
// fills in `targets[0]` with our fallthrough block, so this
|
||||
// is valid for both Jump and Fallthrough.
|
||||
ctx.emit(Inst::Jump {
|
||||
dest: BranchTarget::Block(targets[0]),
|
||||
dest: BranchTarget::Label(targets[0]),
|
||||
});
|
||||
}
|
||||
Opcode::BrTable => {
|
||||
// Expand `br_table index, default, JT` to:
|
||||
//
|
||||
// emit_island // this forces an island at this point
|
||||
// // if the jumptable would push us past
|
||||
// // the deadline
|
||||
// subs idx, #jt_size
|
||||
// b.hs default
|
||||
// adr vTmp1, PC+16
|
||||
|
@ -2070,6 +2176,11 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
// [jumptable offsets relative to JT base]
|
||||
let jt_size = targets.len() - 1;
|
||||
assert!(jt_size <= std::u32::MAX as usize);
|
||||
|
||||
ctx.emit(Inst::EmitIsland {
|
||||
needed_space: 4 * (6 + jt_size) as CodeOffset,
|
||||
});
|
||||
|
||||
let ridx = input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
|
@ -2079,8 +2190,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
NarrowValueMode::ZeroExtend32,
|
||||
);
|
||||
|
||||
let rtmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
|
||||
// Bounds-check and branch to default.
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
||||
|
@ -2099,10 +2210,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
}
|
||||
let default_target = BranchTarget::Block(targets[0]);
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
|
||||
let default_target = BranchTarget::Label(targets[0]);
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: default_target.clone(),
|
||||
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
|
||||
});
|
||||
|
||||
// Emit the compound instruction that does:
|
||||
|
@ -2123,19 +2234,23 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
let jt_targets: Vec<BranchTarget> = targets
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|bix| BranchTarget::Block(*bix))
|
||||
.map(|bix| BranchTarget::Label(*bix))
|
||||
.collect();
|
||||
let targets_for_term: Vec<BlockIndex> = targets.to_vec();
|
||||
let targets_for_term: Vec<MachLabel> = targets.to_vec();
|
||||
ctx.emit(Inst::JTSequence {
|
||||
ridx,
|
||||
rtmp1,
|
||||
rtmp2,
|
||||
targets: jt_targets,
|
||||
targets_for_term,
|
||||
info: Box::new(JTSequenceInfo {
|
||||
targets: jt_targets,
|
||||
targets_for_term: targets_for_term,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
_ => panic!("Unknown branch type!"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{
|
||||
|
@ -15,7 +16,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, Triple};
|
|||
|
||||
// New backend:
|
||||
mod abi;
|
||||
mod inst;
|
||||
pub(crate) mod inst;
|
||||
mod lower;
|
||||
mod lower_inst;
|
||||
|
||||
|
@ -25,12 +26,18 @@ use inst::create_reg_universe;
|
|||
pub struct AArch64Backend {
|
||||
triple: Triple,
|
||||
flags: settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl AArch64Backend {
|
||||
/// Create a new AArch64 backend with the given (shared) flags.
|
||||
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
|
||||
AArch64Backend { triple, flags }
|
||||
let reg_universe = create_reg_universe(&flags);
|
||||
AArch64Backend {
|
||||
triple,
|
||||
flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
|
||||
/// This performs lowering to VCode, register-allocates the code, computes block layout and
|
||||
|
@ -40,7 +47,7 @@ impl AArch64Backend {
|
|||
func: &Function,
|
||||
flags: settings::Flags,
|
||||
) -> CodegenResult<VCode<inst::Inst>> {
|
||||
let abi = Box::new(abi::AArch64ABIBody::new(func, flags));
|
||||
let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
|
||||
compile::compile::<AArch64Backend>(func, self, abi)
|
||||
}
|
||||
}
|
||||
|
@ -53,7 +60,7 @@ impl MachBackend for AArch64Backend {
|
|||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags.clone())?;
|
||||
let sections = vcode.emit();
|
||||
let buffer = vcode.emit();
|
||||
let frame_size = vcode.frame_size();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
|
@ -62,8 +69,10 @@ impl MachBackend for AArch64Backend {
|
|||
None
|
||||
};
|
||||
|
||||
let buffer = buffer.finish();
|
||||
|
||||
Ok(MachCompileResult {
|
||||
sections,
|
||||
buffer,
|
||||
frame_size,
|
||||
disasm,
|
||||
})
|
||||
|
@ -81,8 +90,21 @@ impl MachBackend for AArch64Backend {
|
|||
&self.flags
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> RealRegUniverse {
|
||||
create_reg_universe(&self.flags)
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
&self.reg_universe
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on
|
||||
// overflow of an add.
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on
|
||||
// underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the
|
||||
// opposite of x86).
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,8 +156,8 @@ mod test {
|
|||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
);
|
||||
let sections = backend.compile_function(&mut func, false).unwrap().sections;
|
||||
let code = §ions.sections[0].data;
|
||||
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
|
||||
let code = &buffer.data[..];
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
|
@ -149,7 +171,7 @@ mod test {
|
|||
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden);
|
||||
assert_eq!(code, &golden[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -192,34 +214,32 @@ mod test {
|
|||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = &result.sections.sections[0].data;
|
||||
let code = &result.buffer.data[..];
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
// mov x1, x0
|
||||
// mov x0, #0x1234
|
||||
// add w1, w1, w0
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// sub w0, w1, w0
|
||||
// mov x1, #0x1234 // #4660
|
||||
// add w0, w0, w1
|
||||
// mov w1, w0
|
||||
// cbnz x1, 0x28
|
||||
// mov x1, #0x1234 // #4660
|
||||
// add w1, w0, w1
|
||||
// mov w1, w1
|
||||
// cbnz x1, 0x18
|
||||
// mov w1, w0
|
||||
// cbnz x1, 0x18
|
||||
// mov x1, #0x1234 // #4660
|
||||
// sub w0, w0, w1
|
||||
// mov sp, x29
|
||||
// ldp x29, x30, [sp], #16
|
||||
// ret
|
||||
// add w2, w1, w0
|
||||
// mov w2, w2
|
||||
// cbnz x2, ... <---- compound branch (cond / uncond)
|
||||
// b ... <----
|
||||
|
||||
let golden = vec![
|
||||
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
|
||||
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
|
||||
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
|
||||
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
|
||||
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
|
||||
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
|
||||
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
|
||||
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
|
||||
193, 168, 192, 3, 95, 214,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden);
|
||||
assert_eq!(code, &golden[..]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
|||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{Architecture, Triple};
|
||||
|
||||
|
@ -135,6 +136,10 @@ impl TargetIsa for Isa {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
|
|
|
@ -66,6 +66,7 @@ use crate::settings::SetResult;
|
|||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
|
||||
|
@ -77,11 +78,14 @@ mod riscv;
|
|||
#[cfg(feature = "x86")]
|
||||
mod x86;
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
mod x64;
|
||||
|
||||
#[cfg(feature = "arm32")]
|
||||
mod arm32;
|
||||
|
||||
#[cfg(feature = "arm64")]
|
||||
mod aarch64;
|
||||
pub(crate) mod aarch64;
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
pub mod unwind;
|
||||
|
@ -419,6 +423,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
|||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait
|
||||
/// with `isa.as_any().downcast_ref::<isa::foo::Isa>()`.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
impl Debug for &dyn TargetIsa {
|
||||
|
|
|
@ -17,6 +17,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
|||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -130,6 +131,10 @@ impl TargetIsa for Isa {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -163,7 +168,7 @@ mod tests {
|
|||
let arg32 = func.dfg.append_block_param(block, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
let inst64 = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
|
@ -176,7 +181,7 @@ mod tests {
|
|||
);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
let inst64_large = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
|
@ -186,7 +191,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV64.
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
let inst32 = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
|
@ -214,7 +219,7 @@ mod tests {
|
|||
let arg32 = func.dfg.append_block_param(block, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
let inst64 = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
|
@ -224,7 +229,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64, types::I64).is_err());
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
let inst64_large = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
|
@ -234,7 +239,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV32.
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
let inst32 = InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
|
|
|
@ -8,7 +8,6 @@ use thiserror::Error;
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
type Register = u16;
|
||||
type Expression = Vec<u8>;
|
||||
|
||||
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
|
||||
#[allow(missing_docs)]
|
||||
|
@ -23,6 +22,8 @@ pub enum RegisterMappingError {
|
|||
}
|
||||
|
||||
// This mirrors gimli's CallFrameInstruction, but is serializable
|
||||
// This excludes CfaExpression, Expression, ValExpression due to
|
||||
// https://github.com/gimli-rs/gimli/issues/513.
|
||||
// TODO: if gimli ever adds serialization support, remove this type
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
|
@ -30,15 +31,12 @@ pub(crate) enum CallFrameInstruction {
|
|||
Cfa(Register, i32),
|
||||
CfaRegister(Register),
|
||||
CfaOffset(i32),
|
||||
CfaExpression(Expression),
|
||||
Restore(Register),
|
||||
Undefined(Register),
|
||||
SameValue(Register),
|
||||
Offset(Register, i32),
|
||||
ValOffset(Register, i32),
|
||||
Register(Register, Register),
|
||||
Expression(Register, Expression),
|
||||
ValExpression(Register, Expression),
|
||||
RememberState,
|
||||
RestoreState,
|
||||
ArgsSize(u32),
|
||||
|
@ -52,34 +50,33 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
|||
CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
|
||||
CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
|
||||
CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
|
||||
CallFrameInstruction::CfaExpression(expr) => Self::CfaExpression(expr.0),
|
||||
CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
|
||||
CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
|
||||
CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
|
||||
CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
|
||||
CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
|
||||
CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
|
||||
CallFrameInstruction::Expression(reg, expr) => Self::Expression(reg.0, expr.0),
|
||||
CallFrameInstruction::ValExpression(reg, expr) => Self::ValExpression(reg.0, expr.0),
|
||||
CallFrameInstruction::RememberState => Self::RememberState,
|
||||
CallFrameInstruction::RestoreState => Self::RestoreState,
|
||||
CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
|
||||
_ => {
|
||||
// Cranelift's unwind support does not generate `CallFrameInstruction`s with
|
||||
// Expression at this moment, and it is not trivial to
|
||||
// serialize such instructions.
|
||||
panic!("CallFrameInstruction with Expression not supported");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
||||
fn into(self) -> gimli::write::CallFrameInstruction {
|
||||
use gimli::{
|
||||
write::{CallFrameInstruction, Expression},
|
||||
Register,
|
||||
};
|
||||
use gimli::{write::CallFrameInstruction, Register};
|
||||
|
||||
match self {
|
||||
Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
|
||||
Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
|
||||
Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
|
||||
Self::CfaExpression(expr) => CallFrameInstruction::CfaExpression(Expression(expr)),
|
||||
Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
|
||||
Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
|
||||
Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
|
||||
|
@ -88,12 +85,6 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
|||
Self::Register(reg1, reg2) => {
|
||||
CallFrameInstruction::Register(Register(reg1), Register(reg2))
|
||||
}
|
||||
Self::Expression(reg, expr) => {
|
||||
CallFrameInstruction::Expression(Register(reg), Expression(expr))
|
||||
}
|
||||
Self::ValExpression(reg, expr) => {
|
||||
CallFrameInstruction::ValExpression(Register(reg), Expression(expr))
|
||||
}
|
||||
Self::RememberState => CallFrameInstruction::RememberState,
|
||||
Self::RestoreState => CallFrameInstruction::RestoreState,
|
||||
Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
|
||||
|
|
|
@ -0,0 +1,467 @@
|
|||
//! Implementation of the standard x64 ABI.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
|
||||
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
|
||||
use crate::isa::{self, x64::inst::*};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use args::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ABIArg {
|
||||
Reg(RealReg),
|
||||
_Stack,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ABIRet {
|
||||
Reg(RealReg),
|
||||
_Stack,
|
||||
}
|
||||
|
||||
pub(crate) struct X64ABIBody {
|
||||
args: Vec<ABIArg>,
|
||||
rets: Vec<ABIRet>,
|
||||
|
||||
/// Offsets to each stack slot.
|
||||
_stack_slots: Vec<usize>,
|
||||
|
||||
/// Total stack size of all the stack slots.
|
||||
stack_slots_size: usize,
|
||||
|
||||
/// Clobbered registers, as indicated by regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
|
||||
/// Total number of spill slots, as indicated by regalloc.
|
||||
num_spill_slots: Option<usize>,
|
||||
|
||||
/// Calculated while creating the prologue, and used when creating the epilogue. Amount by
|
||||
/// which RSP is adjusted downwards to allocate the spill area.
|
||||
frame_size_bytes: Option<usize>,
|
||||
|
||||
call_conv: isa::CallConv,
|
||||
|
||||
/// The settings controlling this function's compilation.
|
||||
flags: settings::Flags,
|
||||
}
|
||||
|
||||
fn in_int_reg(ty: types::Type) -> bool {
|
||||
match ty {
|
||||
types::I8
|
||||
| types::I16
|
||||
| types::I32
|
||||
| types::I64
|
||||
| types::B1
|
||||
| types::B8
|
||||
| types::B16
|
||||
| types::B32
|
||||
| types::B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
2 => Some(regs::rdx()),
|
||||
3 => Some(regs::rcx()),
|
||||
4 => Some(regs::r8()),
|
||||
5 => Some(regs::r9()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_callee_save_systemv(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => match r.get_hw_encoding() as u8 {
|
||||
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
|
||||
_ => false,
|
||||
},
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
regs.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl X64ABIBody {
|
||||
/// Create a new body ABI instance.
|
||||
pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
|
||||
// Compute args and retvals from signature.
|
||||
let mut args = vec![];
|
||||
let mut next_int_arg = 0;
|
||||
for param in &f.signature.params {
|
||||
match param.purpose {
|
||||
ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
|
||||
// `VMContext` is `r14` in Baldrdash.
|
||||
args.push(ABIArg::Reg(regs::r14().to_real_reg()));
|
||||
}
|
||||
|
||||
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
|
||||
if in_int_reg(param.value_type) {
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
|
||||
args.push(ABIArg::Reg(reg.to_real_reg()));
|
||||
} else {
|
||||
unimplemented!("passing arg on the stack");
|
||||
}
|
||||
next_int_arg += 1;
|
||||
} else {
|
||||
unimplemented!("non int normal register")
|
||||
}
|
||||
}
|
||||
|
||||
_ => unimplemented!("other parameter purposes"),
|
||||
}
|
||||
}
|
||||
|
||||
let mut rets = vec![];
|
||||
let mut next_int_retval = 0;
|
||||
for ret in &f.signature.returns {
|
||||
match ret.purpose {
|
||||
ir::ArgumentPurpose::Normal => {
|
||||
if in_int_reg(ret.value_type) {
|
||||
if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
|
||||
rets.push(ABIRet::Reg(reg.to_real_reg()));
|
||||
} else {
|
||||
unimplemented!("passing return on the stack");
|
||||
}
|
||||
next_int_retval += 1;
|
||||
} else {
|
||||
unimplemented!("returning non integer normal value");
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
unimplemented!("non normal argument purpose");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute stackslot locations and total stackslot size.
|
||||
let mut stack_offset: usize = 0;
|
||||
let mut _stack_slots = vec![];
|
||||
for (stackslot, data) in f.stack_slots.iter() {
|
||||
let off = stack_offset;
|
||||
stack_offset += data.size as usize;
|
||||
|
||||
// 8-bit align.
|
||||
stack_offset = (stack_offset + 7) & !7usize;
|
||||
|
||||
debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
|
||||
_stack_slots.push(off);
|
||||
}
|
||||
|
||||
Self {
|
||||
args,
|
||||
rets,
|
||||
_stack_slots,
|
||||
stack_slots_size: stack_offset,
|
||||
clobbered: Set::empty(),
|
||||
num_spill_slots: None,
|
||||
frame_size_bytes: None,
|
||||
call_conv: f.signature.call_conv.clone(),
|
||||
flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ABIBody for X64ABIBody {
|
||||
type I = Inst;
|
||||
|
||||
fn temp_needed(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn init(&mut self, _: Option<Writable<Reg>>) {}
|
||||
|
||||
fn flags(&self) -> &settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn num_retvals(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn num_stackslots(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for arg in &self.args {
|
||||
if let &ABIArg::Reg(r) = arg {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for ret in &self.rets {
|
||||
if let &ABIRet::Reg(r) = ret {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
|
||||
match &self.args[idx] {
|
||||
ABIArg::Reg(from_reg) => {
|
||||
if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
|
||||
// TODO do we need a sign extension if it's I32?
|
||||
return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
|
||||
}
|
||||
unimplemented!("moving from non-int arg to vreg");
|
||||
}
|
||||
ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_retval_area_setup(&self) -> Option<Inst> {
|
||||
None
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_retval(
|
||||
&self,
|
||||
idx: usize,
|
||||
from_reg: Writable<Reg>,
|
||||
ext: ArgumentExtension,
|
||||
) -> Vec<Inst> {
|
||||
match ext {
|
||||
ArgumentExtension::None => {}
|
||||
_ => unimplemented!(
|
||||
"unimplemented argument extension {:?} is required for baldrdash",
|
||||
ext
|
||||
),
|
||||
};
|
||||
|
||||
let mut ret = Vec::new();
|
||||
match &self.rets[idx] {
|
||||
ABIRet::Reg(to_reg) => {
|
||||
if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
|
||||
ret.push(Inst::mov_r_r(
|
||||
/*is64=*/ true,
|
||||
from_reg.to_reg(),
|
||||
Writable::<Reg>::from_reg(to_reg.to_reg()),
|
||||
))
|
||||
} else {
|
||||
unimplemented!("moving from vreg to non-int return value");
|
||||
}
|
||||
}
|
||||
|
||||
ABIRet::_Stack => {
|
||||
unimplemented!("moving from vreg to stack return value");
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
fn gen_ret(&self) -> Inst {
|
||||
Inst::ret()
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder(&self) -> Inst {
|
||||
Inst::epilogue_placeholder()
|
||||
}
|
||||
|
||||
fn set_num_spillslots(&mut self, slots: usize) {
|
||||
self.num_spill_slots = Some(slots);
|
||||
}
|
||||
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
|
||||
self.clobbered = clobbered;
|
||||
}
|
||||
|
||||
fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn load_stackslot(
|
||||
&self,
|
||||
_slot: StackSlot,
|
||||
_offset: u32,
|
||||
_ty: Type,
|
||||
_into_reg: Writable<Reg>,
|
||||
) -> Inst {
|
||||
unimplemented!("load_stackslot")
|
||||
}
|
||||
|
||||
fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
|
||||
unimplemented!("store_stackslot")
|
||||
}
|
||||
|
||||
fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
|
||||
unimplemented!("load_spillslot")
|
||||
}
|
||||
|
||||
fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
|
||||
unimplemented!("store_spillslot")
|
||||
}
|
||||
|
||||
fn gen_prologue(&mut self) -> Vec<Inst> {
|
||||
let r_rsp = regs::rsp();
|
||||
|
||||
let mut insts = vec![];
|
||||
|
||||
// Baldrdash generates its own prologue sequence, so we don't have to.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let r_rbp = regs::rbp();
|
||||
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
|
||||
|
||||
// The "traditional" pre-preamble
|
||||
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
|
||||
insts.push(Inst::push64(RMI::reg(r_rbp)));
|
||||
// RSP is now 0 % 16
|
||||
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
|
||||
}
|
||||
|
||||
// Save callee saved registers that we trash. Keep track of how much space we've used, so
|
||||
// as to know what we have to do to get the base of the spill area 0 % 16.
|
||||
let mut callee_saved_used = 0;
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
for reg in clobbered {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
RegClass::I64 => {
|
||||
insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
|
||||
callee_saved_used += 8;
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
|
||||
if self.call_conv.extends_baldrdash() {
|
||||
// Baldrdash expects the stack to take at least the number of words set in
|
||||
// baldrdash_prologue_words; count them here.
|
||||
debug_assert!(
|
||||
!self.flags.enable_probestack(),
|
||||
"baldrdash does not expect cranelift to emit stack probes"
|
||||
);
|
||||
total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
|
||||
}
|
||||
|
||||
debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
|
||||
let frame_size = total_stacksize + callee_saved_used % 16;
|
||||
|
||||
// Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
|
||||
let frame_size = (frame_size + 15) & !15;
|
||||
if frame_size > 0x7FFF_FFFF {
|
||||
unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
|
||||
}
|
||||
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
// Explicitly allocate the frame.
|
||||
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
|
||||
if frame_size > 0 {
|
||||
insts.push(Inst::alu_rmi_r(
|
||||
true,
|
||||
RMI_R_Op::Sub,
|
||||
RMI::imm(frame_size as u32),
|
||||
w_rsp,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Stash this value. We'll need it for the epilogue.
|
||||
debug_assert!(self.frame_size_bytes.is_none());
|
||||
self.frame_size_bytes = Some(frame_size);
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_epilogue(&self) -> Vec<Inst> {
|
||||
let mut insts = vec![];
|
||||
|
||||
// Undo what we did in the prologue.
|
||||
|
||||
// Clear the spill area and the 16-alignment padding below it.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let frame_size = self.frame_size_bytes.unwrap();
|
||||
if frame_size > 0 {
|
||||
let r_rsp = regs::rsp();
|
||||
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
|
||||
|
||||
insts.push(Inst::alu_rmi_r(
|
||||
true,
|
||||
RMI_R_Op::Add,
|
||||
RMI::imm(frame_size as u32),
|
||||
w_rsp,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Restore regs.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
for w_real_reg in clobbered.into_iter().rev() {
|
||||
match w_real_reg.to_reg().get_class() {
|
||||
RegClass::I64 => {
|
||||
// TODO: make these conversion sequences less cumbersome.
|
||||
insts.push(Inst::pop64(Writable::<Reg>::from_reg(
|
||||
w_real_reg.to_reg().to_reg(),
|
||||
)))
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
// Baldrdash generates its own preamble.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let r_rbp = regs::rbp();
|
||||
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
|
||||
|
||||
// Undo the "traditional" pre-preamble
|
||||
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
|
||||
insts.push(Inst::pop64(w_rbp));
|
||||
insts.push(Inst::ret());
|
||||
}
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn frame_size(&self) -> u32 {
|
||||
self.frame_size_bytes
|
||||
.expect("frame size not computed before prologue generation") as u32
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
(RegClass::I64, _) => 1,
|
||||
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
|
||||
(RegClass::V128, _) => 2,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,420 @@
|
|||
//! Instruction operand sub-components (aka "parts"): definitions and printing.
|
||||
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
|
||||
|
||||
use crate::machinst::*;
|
||||
|
||||
use super::regs::show_ireg_sized;
|
||||
|
||||
/// A Memory Address. These denote a 64-bit value only.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Addr {
|
||||
/// Immediate sign-extended and a Register.
|
||||
IR { simm32: u32, base: Reg },
|
||||
|
||||
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
|
||||
IRRS {
|
||||
simm32: u32,
|
||||
base: Reg,
|
||||
index: Reg,
|
||||
shift: u8, /* 0 .. 3 only */
|
||||
},
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
// Constructors.
|
||||
|
||||
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
Self::IR { simm32, base }
|
||||
}
|
||||
|
||||
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
debug_assert!(index.get_class() == RegClass::I64);
|
||||
debug_assert!(shift <= 3);
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base,
|
||||
index,
|
||||
shift,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
Addr::IR { simm32: _, base } => {
|
||||
collector.add_use(*base);
|
||||
}
|
||||
Addr::IRRS {
|
||||
simm32: _,
|
||||
base,
|
||||
index,
|
||||
shift: _,
|
||||
} => {
|
||||
collector.add_use(*base);
|
||||
collector.add_use(*index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Addr {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base,
|
||||
index,
|
||||
shift,
|
||||
} => format!(
|
||||
"{}({},{},{})",
|
||||
*simm32 as i32,
|
||||
base.show_rru(mb_rru),
|
||||
index.show_rru(mb_rru),
|
||||
1 << shift
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
|
||||
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
|
||||
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
|
||||
/// `simm32` is its sign-extension out to 64 bits.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RMI {
|
||||
R { reg: Reg },
|
||||
M { addr: Addr },
|
||||
I { simm32: u32 },
|
||||
}
|
||||
|
||||
impl RMI {
|
||||
// Constructors
|
||||
|
||||
pub(crate) fn reg(reg: Reg) -> RMI {
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
RMI::R { reg }
|
||||
}
|
||||
pub(crate) fn mem(addr: Addr) -> RMI {
|
||||
RMI::M { addr }
|
||||
}
|
||||
pub(crate) fn imm(simm32: u32) -> RMI {
|
||||
RMI::I { simm32 }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RMI::R { reg } => collector.add_use(*reg),
|
||||
RMI::M { addr } => addr.get_regs_as_uses(collector),
|
||||
RMI::I { simm32: _ } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RMI {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.show_rru_sized(mb_rru, 8)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RMI::M { addr } => addr.show_rru(mb_rru),
|
||||
RMI::I { simm32 } => format!("${}", *simm32 as i32),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
|
||||
/// 32 or 64 bit value.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RM {
|
||||
R { reg: Reg },
|
||||
M { addr: Addr },
|
||||
}
|
||||
|
||||
impl RM {
|
||||
// Constructors.
|
||||
|
||||
pub(crate) fn reg(reg: Reg) -> Self {
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
RM::R { reg }
|
||||
}
|
||||
|
||||
pub(crate) fn mem(addr: Addr) -> Self {
|
||||
RM::M { addr }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RM::R { reg } => collector.add_use(*reg),
|
||||
RM::M { addr } => addr.get_regs_as_uses(collector),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RM {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.show_rru_sized(mb_rru, 8)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RM::M { addr } => addr.show_rru(mb_rru),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum RMI_R_Op {
|
||||
Add,
|
||||
Sub,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
|
||||
Mul,
|
||||
}
|
||||
|
||||
impl RMI_R_Op {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
RMI_R_Op::Add => "add".to_string(),
|
||||
RMI_R_Op::Sub => "sub".to_string(),
|
||||
RMI_R_Op::And => "and".to_string(),
|
||||
RMI_R_Op::Or => "or".to_string(),
|
||||
RMI_R_Op::Xor => "xor".to_string(),
|
||||
RMI_R_Op::Mul => "imul".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RMI_R_Op {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate ways of extending (widening) a value, using the Intel naming:
|
||||
/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum ExtMode {
|
||||
/// Byte -> Longword.
|
||||
BL,
|
||||
/// Byte -> Quadword.
|
||||
BQ,
|
||||
/// Word -> Longword.
|
||||
WL,
|
||||
/// Word -> Quadword.
|
||||
WQ,
|
||||
/// Longword -> Quadword.
|
||||
LQ,
|
||||
}
|
||||
|
||||
impl ExtMode {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
ExtMode::BL => "bl".to_string(),
|
||||
ExtMode::BQ => "bq".to_string(),
|
||||
ExtMode::WL => "wl".to_string(),
|
||||
ExtMode::WQ => "wq".to_string(),
|
||||
ExtMode::LQ => "lq".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn dst_size(&self) -> u8 {
|
||||
match self {
|
||||
ExtMode::BL => 4,
|
||||
ExtMode::BQ => 8,
|
||||
ExtMode::WL => 4,
|
||||
ExtMode::WQ => 8,
|
||||
ExtMode::LQ => 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ExtMode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
|
||||
#[derive(Clone)]
|
||||
pub enum ShiftKind {
|
||||
Left,
|
||||
RightZ,
|
||||
RightS,
|
||||
}
|
||||
|
||||
impl ShiftKind {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
ShiftKind::Left => "shl".to_string(),
|
||||
ShiftKind::RightZ => "shr".to_string(),
|
||||
ShiftKind::RightS => "sar".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ShiftKind {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate condition code tests. Not all are represented since not all are useful in
|
||||
/// compiler-generated code.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(u8)]
|
||||
pub enum CC {
|
||||
/// overflow
|
||||
O = 0,
|
||||
/// no overflow
|
||||
NO = 1,
|
||||
|
||||
/// < unsigned
|
||||
B = 2,
|
||||
/// >= unsigned
|
||||
NB = 3,
|
||||
|
||||
/// zero
|
||||
Z = 4,
|
||||
/// not-zero
|
||||
NZ = 5,
|
||||
|
||||
/// <= unsigned
|
||||
BE = 6,
|
||||
/// > unsigend
|
||||
NBE = 7,
|
||||
|
||||
/// negative
|
||||
S = 8,
|
||||
/// not-negative
|
||||
NS = 9,
|
||||
|
||||
/// < signed
|
||||
L = 12,
|
||||
/// >= signed
|
||||
NL = 13,
|
||||
|
||||
/// <= signed
|
||||
LE = 14,
|
||||
/// > signed
|
||||
NLE = 15,
|
||||
}
|
||||
|
||||
impl CC {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
CC::O => "o".to_string(),
|
||||
CC::NO => "no".to_string(),
|
||||
CC::B => "b".to_string(),
|
||||
CC::NB => "nb".to_string(),
|
||||
CC::Z => "z".to_string(),
|
||||
CC::NZ => "nz".to_string(),
|
||||
CC::BE => "be".to_string(),
|
||||
CC::NBE => "nbe".to_string(),
|
||||
CC::S => "s".to_string(),
|
||||
CC::NS => "ns".to_string(),
|
||||
CC::L => "l".to_string(),
|
||||
CC::NL => "nl".to_string(),
|
||||
CC::LE => "le".to_string(),
|
||||
CC::NLE => "nle".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn invert(&self) -> CC {
|
||||
match self {
|
||||
CC::O => CC::NO,
|
||||
CC::NO => CC::O,
|
||||
|
||||
CC::B => CC::NB,
|
||||
CC::NB => CC::B,
|
||||
|
||||
CC::Z => CC::NZ,
|
||||
CC::NZ => CC::Z,
|
||||
|
||||
CC::BE => CC::NBE,
|
||||
CC::NBE => CC::BE,
|
||||
|
||||
CC::S => CC::NS,
|
||||
CC::NS => CC::S,
|
||||
|
||||
CC::L => CC::NL,
|
||||
CC::NL => CC::L,
|
||||
|
||||
CC::LE => CC::NLE,
|
||||
CC::NLE => CC::LE,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_enc(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for CC {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a MachLabel.
|
||||
Label(MachLabel),
|
||||
|
||||
/// A resolved reference to another instruction, in bytes.
|
||||
ResolvedOffset(isize),
|
||||
}
|
||||
|
||||
impl ShowWithRRU for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
BranchTarget::Label(l) => format!("{:?}", l),
|
||||
BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Get the label.
|
||||
pub fn as_label(&self) -> Option<MachLabel> {
|
||||
match self {
|
||||
&BranchTarget::Label(l) => Some(l),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a signed 32 bit byte offset. This returns the
|
||||
/// offset in bytes between the first byte of the source and the first
|
||||
/// byte of the target. It does not take into account the Intel-specific
|
||||
/// rule that a branch offset is encoded as relative to the start of the
|
||||
/// following instruction. That is a problem for the emitter to deal
|
||||
/// with. If a label, returns zero.
|
||||
pub fn as_offset32_or_zero(&self) -> i32 {
|
||||
match self {
|
||||
&BranchTarget::ResolvedOffset(off) => {
|
||||
// Leave a bit of slack so that the emitter is guaranteed to
|
||||
// be able to add the length of the jump instruction encoding
|
||||
// to this value and still have a value in signed-32 range.
|
||||
assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
|
||||
off as i32
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,892 @@
|
|||
use regalloc::{Reg, RegClass};
|
||||
|
||||
use crate::isa::x64::inst::*;
|
||||
|
||||
fn low8willSXto64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
}
|
||||
|
||||
fn low8willSXto32(x: u32) -> bool {
|
||||
let xs = x as i32;
|
||||
xs == ((xs << 24) >> 24)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions and subcomponents: emission
|
||||
|
||||
// For all of the routines that take both a memory-or-reg operand (sometimes
|
||||
// called "E" in the Intel documentation) and a reg-only operand ("G" in
|
||||
// Intelese), the order is always G first, then E.
|
||||
//
|
||||
// "enc" in the following means "hardware register encoding number".
|
||||
|
||||
#[inline(always)]
|
||||
fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
|
||||
debug_assert!(m0d < 4);
|
||||
debug_assert!(encRegG < 8);
|
||||
debug_assert!(rmE < 8);
|
||||
((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
|
||||
debug_assert!(shift < 4);
|
||||
debug_assert!(encIndex < 8);
|
||||
debug_assert!(encBase < 8);
|
||||
((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
|
||||
}
|
||||
|
||||
/// Get the encoding number from something which we sincerely hope is a real
|
||||
/// register of class I64.
|
||||
#[inline(always)]
|
||||
fn iregEnc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
// F_*: these flags describe special handling of the insn to be generated. Be
|
||||
// careful with these. It is easy to create nonsensical combinations.
|
||||
const F_NONE: u32 = 0;
|
||||
|
||||
/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
|
||||
const F_RETAIN_REDUNDANT_REX: u32 = 1;
|
||||
|
||||
/// Set the W bit in the REX prefix to zero. By default it will be set to 1,
|
||||
/// indicating a 64-bit operation.
|
||||
const F_CLEAR_REX_W: u32 = 2;
|
||||
|
||||
/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate
|
||||
/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W.
|
||||
const F_PREFIX_66: u32 = 4;
|
||||
|
||||
/// This is the core 'emit' function for instructions that reference memory.
|
||||
///
|
||||
/// For an instruction that has as operands a register `encG` and a memory
|
||||
/// address `memE`, create and emit, first the REX prefix, then caller-supplied
|
||||
/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
|
||||
/// optionally, a SIB byte, and finally optionally an immediate that will be
|
||||
/// derived from the `memE` operand. For most instructions up to and including
|
||||
/// SSE4.2, that will be the whole instruction.
|
||||
///
|
||||
/// The opcodes are written bigendianly for the convenience of callers. For
|
||||
/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
|
||||
/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
|
||||
///
|
||||
/// The register operand is represented here not as a `Reg` but as its hardware
|
||||
/// encoding, `encG`. `flags` can specify special handling for the REX prefix.
|
||||
/// By default, the REX prefix will indicate a 64-bit operation and will be
|
||||
/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the
|
||||
/// REX prefix will normally never be redundant, since REX.W must be 1 to
|
||||
/// indicate a 64-bit operation.
|
||||
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
mut numOpcodes: usize,
|
||||
encG: u8,
|
||||
memE: &Addr,
|
||||
flags: u32,
|
||||
) {
|
||||
// General comment for this function: the registers in `memE` must be
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `encG` can be derived from a register of any class.
|
||||
let prefix66 = (flags & F_PREFIX_66) != 0;
|
||||
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
|
||||
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
|
||||
// The operand-size override, if requested. This indicates a 16-bit
|
||||
// operation.
|
||||
if prefix66 {
|
||||
sink.put1(0x66);
|
||||
}
|
||||
match memE {
|
||||
Addr::IR { simm32, base: regE } => {
|
||||
// First, cook up the REX byte. This is easy.
|
||||
let encE = iregEnc(*regE);
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (encE >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// Now the mod/rm and associated immediates. This is
|
||||
// significantly complicated due to the multiple special cases.
|
||||
if *simm32 == 0
|
||||
&& encE != regs::ENC_RSP
|
||||
&& encE != regs::ENC_RBP
|
||||
&& encE != regs::ENC_R12
|
||||
&& encE != regs::ENC_R13
|
||||
{
|
||||
// FIXME JRS 2020Feb11: those four tests can surely be
|
||||
// replaced by a single mask-and-compare check. We should do
|
||||
// that because this routine is likely to be hot.
|
||||
sink.put1(mkModRegRM(0, encG & 7, encE & 7));
|
||||
} else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
|
||||
sink.put1(mkModRegRM(0, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
} else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
|
||||
sink.put1(mkModRegRM(1, encG & 7, encE & 7));
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
|
||||
sink.put1(mkModRegRM(2, encG & 7, encE & 7));
|
||||
sink.put4(*simm32);
|
||||
} else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(mkModRegRM(1, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
|
||||
//.. wait for test case for RSP case
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(mkModRegRM(2, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
|
||||
}
|
||||
}
|
||||
// Bizarrely, the IRRS case is much simpler.
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base: regBase,
|
||||
index: regIndex,
|
||||
shift,
|
||||
} => {
|
||||
let encBase = iregEnc(*regBase);
|
||||
let encIndex = iregEnc(*regIndex);
|
||||
// The rex byte
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = (encIndex >> 3) & 1;
|
||||
let b = (encBase >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// All other prefixes and opcodes
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// modrm, SIB, immediates
|
||||
if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
|
||||
sink.put1(mkModRegRM(1, encG & 7, 4));
|
||||
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
|
||||
sink.put1(*simm32 as u8);
|
||||
} else if encIndex != regs::ENC_RSP {
|
||||
sink.put1(mkModRegRM(2, encG & 7, 4));
|
||||
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that do not reference
|
||||
/// memory.
|
||||
///
|
||||
/// This is conceptually the same as
|
||||
/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
|
||||
/// where the E operand is a register rather than memory. Hence it is much
|
||||
/// simpler.
|
||||
fn emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
mut numOpcodes: usize,
|
||||
encG: u8,
|
||||
encE: u8,
|
||||
flags: u32,
|
||||
) {
|
||||
// EncG and EncE can be derived from registers of any class, and they
|
||||
// don't even have to be from the same class. For example, for an
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
let prefix66 = (flags & F_PREFIX_66) != 0;
|
||||
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
|
||||
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
|
||||
// The operand-size override
|
||||
if prefix66 {
|
||||
sink.put1(0x66);
|
||||
}
|
||||
// The rex byte
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (encE >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// All other prefixes and opcodes
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// Now the mod/rm byte. The instruction we're generating doesn't access
|
||||
// memory, so there is no SIB byte or immediate -- we're done.
|
||||
sink.put1(mkModRegRM(3, encG & 7, encE & 7));
|
||||
}
|
||||
|
||||
// These are merely wrappers for the above two functions that facilitate passing
|
||||
// actual `Reg`s rather than their encodings.
|
||||
|
||||
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
numOpcodes: usize,
|
||||
regG: Reg,
|
||||
memE: &Addr,
|
||||
flags: u32,
|
||||
) {
|
||||
// JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
|
||||
let encG = iregEnc(regG);
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
|
||||
}
|
||||
|
||||
fn emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
numOpcodes: usize,
|
||||
regG: Reg,
|
||||
regE: Reg,
|
||||
flags: u32,
|
||||
) {
|
||||
// JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
|
||||
let encG = iregEnc(regG);
|
||||
let encE = iregEnc(regE);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
match size {
|
||||
8 | 4 => sink.put4(simm32),
|
||||
2 => sink.put2(simm32 as u16),
|
||||
1 => sink.put1(simm32 as u8),
|
||||
_ => panic!("x64::Inst::emit_simm: unreachable"),
|
||||
}
|
||||
}
|
||||
|
||||
/// The top-level emit function.
|
||||
///
|
||||
/// Important! Do not add improved (shortened) encoding cases to existing
|
||||
/// instructions without also adding tests for those improved encodings. That
|
||||
/// is a dangerous game that leads to hard-to-track-down errors in the emitted
|
||||
/// code.
|
||||
///
|
||||
/// For all instructions, make sure to have test coverage for all of the
|
||||
/// following situations. Do this by creating the cross product resulting from
|
||||
/// applying the following rules to each operand:
|
||||
///
|
||||
/// (1) for any insn that mentions a register: one test using a register from
|
||||
/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
|
||||
/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
|
||||
/// This helps detect incorrect REX prefix construction.
|
||||
///
|
||||
/// (2) for any insn that mentions a byte register: one test for each of the
|
||||
/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
|
||||
/// [r8b .. r11b] and [r12b .. r15b]. This checks that
|
||||
/// apparently-redundant REX prefixes are retained when required.
|
||||
///
|
||||
/// (3) for any insn that contains an immediate field, check the following
|
||||
/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
|
||||
/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
|
||||
/// instructions that require a 32-bit immediate have a short-form encoding
|
||||
/// when the imm is in simm8 range.
|
||||
///
|
||||
/// Rules (1), (2) and (3) don't apply for registers within address expressions
|
||||
/// (`Addr`s). Those are already pretty well tested, and the registers in them
|
||||
/// don't have any effect on the containing instruction (apart from possibly
|
||||
/// require REX prefix bits).
|
||||
///
|
||||
/// When choosing registers for a test, avoid using registers with the same
|
||||
/// offset within a given group. For example, don't use rax and r8, since they
|
||||
/// both have the lowest 3 bits as 000, and so the test won't detect errors
|
||||
/// where those 3-bit register sub-fields are confused by the emitter. Instead
|
||||
/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
|
||||
/// and bpl since they have the same offset in their group; use instead (eg) cl
|
||||
/// and sil.
|
||||
///
|
||||
/// For all instructions, also add a test that uses only low-half registers
|
||||
/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
|
||||
/// prefixes are correctly omitted. This low-half restriction must apply to
|
||||
/// _all_ registers in the insn, even those in address expressions.
|
||||
///
|
||||
/// Following these rules creates large numbers of test cases, but it's the
|
||||
/// only way to make the emitter reliable.
|
||||
///
|
||||
/// Known possible improvements:
|
||||
///
|
||||
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
|
||||
/// care?)
|
||||
pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
match inst {
|
||||
Inst::Nop { len: 0 } => {}
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src: srcE,
|
||||
dst: regG,
|
||||
} => {
|
||||
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
|
||||
if *op == RMI_R_Op::Mul {
|
||||
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
|
||||
// we have to special-case it.
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
0x0FAF,
|
||||
2,
|
||||
regG.to_reg(),
|
||||
*regE,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FAF,
|
||||
2,
|
||||
regG.to_reg(),
|
||||
addr,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if useImm8 { 0x6B } else { 0x69 };
|
||||
// Yes, really, regG twice.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
opcode,
|
||||
1,
|
||||
regG.to_reg(),
|
||||
regG.to_reg(),
|
||||
flags,
|
||||
);
|
||||
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (opcode_R, opcode_M, subopcode_I) = match op {
|
||||
RMI_R_Op::Add => (0x01, 0x03, 0),
|
||||
RMI_R_Op::Sub => (0x29, 0x2B, 5),
|
||||
RMI_R_Op::And => (0x21, 0x23, 4),
|
||||
RMI_R_Op::Or => (0x09, 0x0B, 1),
|
||||
RMI_R_Op::Xor => (0x31, 0x33, 6),
|
||||
RMI_R_Op::Mul => panic!("unreachable"),
|
||||
};
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
// Note. The arguments .. regE .. regG .. sequence
|
||||
// here is the opposite of what is expected. I'm not
|
||||
// sure why this is. But I am fairly sure that the
|
||||
// arg order could be switched back to the expected
|
||||
// .. regG .. regE .. if opcode_rr is also switched
|
||||
// over to the "other" basic integer opcode (viz, the
|
||||
// R/RM vs RM/R duality). However, that would mean
|
||||
// that the test results won't be in accordance with
|
||||
// the GNU as reference output. In other words, the
|
||||
// inversion exists as a result of using GNU as as a
|
||||
// gold standard.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
opcode_R,
|
||||
1,
|
||||
*regE,
|
||||
regG.to_reg(),
|
||||
flags,
|
||||
);
|
||||
// NB: if this is ever extended to handle byte size
|
||||
// ops, be sure to retain redundant REX prefixes.
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
opcode_M,
|
||||
1,
|
||||
regG.to_reg(),
|
||||
addr,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if useImm8 { 0x83 } else { 0x81 };
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let encG = iregEnc(regG.to_reg());
|
||||
emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
|
||||
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
} => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
if *dst_is_64 {
|
||||
// FIXME JRS 2020Feb10: also use the 32-bit case here when
|
||||
// possible
|
||||
sink.put1(0x48 | ((encDst >> 3) & 1));
|
||||
sink.put1(0xB8 | (encDst & 7));
|
||||
sink.put8(*simm64);
|
||||
} else {
|
||||
if ((encDst >> 3) & 1) == 1 {
|
||||
sink.put1(0x41);
|
||||
}
|
||||
sink.put1(0xB8 | (encDst & 7));
|
||||
sink.put4(*simm64 as u32);
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_R { is_64, src, dst } => {
|
||||
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
|
||||
emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
|
||||
}
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
ExtMode::BL => {
|
||||
// MOVZBL is (REX.W==0) 0F B6 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::BQ => {
|
||||
// MOVZBQ is (REX.W==1) 0F B6 /r
|
||||
// I'm not sure why the Intel manual offers different
|
||||
// encodings for MOVZBQ than for MOVZBL. AIUI they should
|
||||
// achieve the same, since MOVZBL is just going to zero out
|
||||
// the upper half of the destination anyway.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::WL => {
|
||||
// MOVZWL is (REX.W==0) 0F B7 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::WQ => {
|
||||
// MOVZWQ is (REX.W==1) 0F B7 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::LQ => {
|
||||
// This is just a standard 32 bit load, and we rely on the
|
||||
// default zero-extension rule to perform the extension.
|
||||
// MOV r/m32, r32 is (REX.W==0) 8B /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
|
||||
}
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::BQ => {
|
||||
// MOVSBQ is (REX.W==1) 0F BE /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::WL => {
|
||||
// MOVSWL is (REX.W==0) 0F BF /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::WQ => {
|
||||
// MOVSWQ is (REX.W==1) 0F BF /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::LQ => {
|
||||
// MOVSLQ is (REX.W==1) 63 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x63,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_M { size, src, addr } => {
|
||||
match size {
|
||||
1 => {
|
||||
// This is one of the few places where the presence of a
|
||||
// redundant REX prefix changes the meaning of the
|
||||
// instruction.
|
||||
let encSrc = iregEnc(*src);
|
||||
let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
|
||||
F_RETAIN_REDUNDANT_REX
|
||||
} else {
|
||||
0
|
||||
};
|
||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x88,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W | retainRedundantRex,
|
||||
)
|
||||
}
|
||||
2 => {
|
||||
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W | F_PREFIX_66,
|
||||
)
|
||||
}
|
||||
4 => {
|
||||
// MOV r32, r/m32 is (REX.W==0) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
8 => {
|
||||
// MOV r64, r/m64 is (REX.W==1) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
|
||||
}
|
||||
_ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
|
||||
}
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::Left => 4,
|
||||
ShiftKind::RightZ => 5,
|
||||
ShiftKind::RightS => 7,
|
||||
};
|
||||
match num_bits {
|
||||
None => {
|
||||
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
|
||||
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xD3,
|
||||
1,
|
||||
subopcode,
|
||||
encDst,
|
||||
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
|
||||
);
|
||||
}
|
||||
Some(num_bits) => {
|
||||
// SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
|
||||
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
|
||||
// When the shift amount is 1, there's an even shorter encoding, but we don't
|
||||
// bother with that nicety here.
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xC1,
|
||||
1,
|
||||
subopcode,
|
||||
encDst,
|
||||
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
|
||||
);
|
||||
sink.put1(*num_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size,
|
||||
src: srcE,
|
||||
dst: regG,
|
||||
} => {
|
||||
let mut retainRedundantRex = 0;
|
||||
if *size == 1 {
|
||||
// Here, a redundant REX prefix changes the meaning of the
|
||||
// instruction.
|
||||
let encG = iregEnc(*regG);
|
||||
if encG >= 4 && encG <= 7 {
|
||||
retainRedundantRex = F_RETAIN_REDUNDANT_REX;
|
||||
}
|
||||
}
|
||||
let mut flags = match size {
|
||||
8 => F_NONE,
|
||||
4 => F_CLEAR_REX_W,
|
||||
2 => F_CLEAR_REX_W | F_PREFIX_66,
|
||||
1 => F_CLEAR_REX_W | retainRedundantRex,
|
||||
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
|
||||
};
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
let opcode = if *size == 1 { 0x38 } else { 0x39 };
|
||||
if *size == 1 {
|
||||
// We also need to check whether the E register forces
|
||||
// the use of a redundant REX.
|
||||
let encE = iregEnc(*regE);
|
||||
if encE >= 4 && encE <= 7 {
|
||||
flags |= F_RETAIN_REDUNDANT_REX;
|
||||
}
|
||||
}
|
||||
// Same comment re swapped args as for Alu_RMI_R.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
let opcode = if *size == 1 { 0x3A } else { 0x3B };
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
// FIXME JRS 2020Feb11: there are shorter encodings for
|
||||
// cmp $imm, rax/eax/ax/al.
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if *size == 1 {
|
||||
0x80
|
||||
} else if useImm8 {
|
||||
0x83
|
||||
} else {
|
||||
0x81
|
||||
};
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let encG = iregEnc(*regG);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink, opcode, 1, 7, /*subopcode*/
|
||||
encG, flags,
|
||||
);
|
||||
emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
match src {
|
||||
RMI::R { reg } => {
|
||||
let encReg = iregEnc(*reg);
|
||||
let rex = 0x40 | ((encReg >> 3) & 1);
|
||||
if rex != 0x40 {
|
||||
sink.put1(rex);
|
||||
}
|
||||
sink.put1(0x50 | (encReg & 7));
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
6, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
if low8willSXto64(*simm32) {
|
||||
sink.put1(0x6A);
|
||||
sink.put1(*simm32 as u8);
|
||||
} else {
|
||||
sink.put1(0x68);
|
||||
sink.put4(*simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
if encDst >= 8 {
|
||||
// 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant
|
||||
// here.
|
||||
sink.put1(0x41);
|
||||
}
|
||||
sink.put1(0x58 + (encDst & 7));
|
||||
}
|
||||
//
|
||||
// ** Inst::CallKnown
|
||||
//
|
||||
Inst::CallUnknown { dest } => {
|
||||
match dest {
|
||||
RM::R { reg } => {
|
||||
let regEnc = iregEnc(*reg);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
regEnc,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RM::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Ret {} => sink.put1(0xC3),
|
||||
|
||||
Inst::JmpKnown { dest } => {
|
||||
let disp = dest.as_offset32_or_zero() - 5;
|
||||
let disp = disp as u32;
|
||||
let br_start = sink.cur_offset();
|
||||
let br_disp_off = br_start + 1;
|
||||
let br_end = br_start + 5;
|
||||
if let Some(l) = dest.as_label() {
|
||||
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
|
||||
sink.add_uncond_branch(br_start, br_end, l);
|
||||
}
|
||||
sink.put1(0xE9);
|
||||
sink.put4(disp);
|
||||
}
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => {
|
||||
// Conditional part.
|
||||
|
||||
// This insn is 6 bytes long. Currently `offset` is relative to
|
||||
// the start of this insn, but the Intel encoding requires it to
|
||||
// be relative to the start of the next instruction. Hence the
|
||||
// adjustment.
|
||||
let taken_disp = taken.as_offset32_or_zero() - 6;
|
||||
let taken_disp = taken_disp as u32;
|
||||
let cond_start = sink.cur_offset();
|
||||
let cond_disp_off = cond_start + 2;
|
||||
let cond_end = cond_start + 6;
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
|
||||
let inverted: [u8; 6] =
|
||||
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
|
||||
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
|
||||
}
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0x80 + cc.get_enc());
|
||||
sink.put4(taken_disp);
|
||||
|
||||
// Unconditional part.
|
||||
|
||||
let nt_disp = not_taken.as_offset32_or_zero() - 5;
|
||||
let nt_disp = nt_disp as u32;
|
||||
let uncond_start = sink.cur_offset();
|
||||
let uncond_disp_off = uncond_start + 1;
|
||||
let uncond_end = uncond_start + 5;
|
||||
if let Some(l) = not_taken.as_label() {
|
||||
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
|
||||
sink.add_uncond_branch(uncond_start, uncond_end, l);
|
||||
}
|
||||
sink.put1(0xE9);
|
||||
sink.put4(nt_disp);
|
||||
}
|
||||
Inst::JmpUnknown { target } => {
|
||||
match target {
|
||||
RM::R { reg } => {
|
||||
let regEnc = iregEnc(*reg);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
regEnc,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RM::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
|
||||
}
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,905 @@
|
|||
//! This module defines x86_64-specific machine instruction types.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::Set;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
|
||||
use crate::ir::ExternalName;
|
||||
use crate::ir::Type;
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
use crate::{settings, CodegenError, CodegenResult};
|
||||
|
||||
pub mod args;
|
||||
mod emit;
|
||||
#[cfg(test)]
|
||||
mod emit_tests;
|
||||
pub mod regs;
|
||||
|
||||
use args::*;
|
||||
use regs::{create_reg_universe_systemv, show_ireg_sized};
|
||||
|
||||
//=============================================================================
|
||||
// Instructions (top level): definition
|
||||
|
||||
// Don't build these directly. Instead use the Inst:: functions to create them.
|
||||
|
||||
/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Inst {
|
||||
/// nops of various sizes, including zero
|
||||
Nop { len: u8 },
|
||||
|
||||
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
|
||||
Alu_RMI_R {
|
||||
is_64: bool,
|
||||
op: RMI_R_Op,
|
||||
src: RMI,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// (imm32 imm64) reg.
|
||||
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
|
||||
Imm_R {
|
||||
dst_is_64: bool,
|
||||
simm64: u64,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (64 32) reg reg
|
||||
Mov_R_R {
|
||||
is_64: bool,
|
||||
src: Reg,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
|
||||
/// Note that the lq variant doesn't really exist since the default
|
||||
/// zero-extend rule makes it unnecessary. For that case we emit the
|
||||
/// equivalent "movl AM, reg32".
|
||||
MovZX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// A plain 64-bit integer load, since MovZX_M_R can't represent that
|
||||
Mov64_M_R { addr: Addr, dst: Writable<Reg> },
|
||||
|
||||
/// movs (bl bq wl wq lq) addr reg (good for all SX loads)
|
||||
MovSX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (b w l q) reg addr (good for all integer stores)
|
||||
Mov_R_M {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
},
|
||||
|
||||
/// (shl shr sar) (l q) imm reg
|
||||
Shift_R {
|
||||
is_64: bool,
|
||||
kind: ShiftKind,
|
||||
/// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
|
||||
num_bits: Option<u8>,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// cmp (b w l q) (reg addr imm) reg
|
||||
Cmp_RMI_R {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RMI,
|
||||
dst: Reg,
|
||||
},
|
||||
|
||||
/// pushq (reg addr imm)
|
||||
Push64 { src: RMI },
|
||||
|
||||
/// popq reg
|
||||
Pop64 { dst: Writable<Reg> },
|
||||
|
||||
/// call simm32
|
||||
CallKnown {
|
||||
dest: ExternalName,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
},
|
||||
|
||||
/// callq (reg mem)
|
||||
CallUnknown {
|
||||
dest: RM,
|
||||
//uses: Set<Reg>,
|
||||
//defs: Set<Writable<Reg>>,
|
||||
},
|
||||
|
||||
// ---- branches (exactly one must appear at end of BB) ----
|
||||
/// ret
|
||||
Ret,
|
||||
|
||||
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
|
||||
/// inserted there.
|
||||
EpiloguePlaceholder,
|
||||
|
||||
/// jmp simm32
|
||||
JmpKnown { dest: BranchTarget },
|
||||
|
||||
/// jcond cond target target
|
||||
/// Symmetrical two-way conditional branch.
|
||||
/// Emitted as a compound sequence; the MachBuffer will shrink it
|
||||
/// as appropriate.
|
||||
JmpCondSymm {
|
||||
cc: CC,
|
||||
taken: BranchTarget,
|
||||
not_taken: BranchTarget,
|
||||
},
|
||||
|
||||
/// jmpq (reg mem)
|
||||
JmpUnknown { target: RM },
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
// For various sizes, will some number of lowest bits sign extend to be the
|
||||
// same as the whole value?
|
||||
pub(crate) fn low32willSXto64(x: u64) -> bool {
|
||||
let xs = x as i64;
|
||||
xs == ((xs << 32) >> 32)
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
pub(crate) fn nop(len: u8) -> Self {
|
||||
debug_assert!(len <= 16);
|
||||
Self::Nop { len }
|
||||
}
|
||||
|
||||
pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Self::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
if !dst_is_64 {
|
||||
debug_assert!(low32willSXto64(simm64));
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov_R_R { is_64, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovZX_M_R { extMode, addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov64_M_R { addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_M_R { extMode, addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_m(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
Inst::Mov_R_M { size, src, addr }
|
||||
}
|
||||
|
||||
pub(crate) fn shift_r(
|
||||
is_64: bool,
|
||||
kind: ShiftKind,
|
||||
num_bits: Option<u8>,
|
||||
dst: Writable<Reg>,
|
||||
) -> Inst {
|
||||
debug_assert!(if let Some(num_bits) = num_bits {
|
||||
num_bits < if is_64 { 64 } else { 32 }
|
||||
} else {
|
||||
true
|
||||
});
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn cmp_rmi_r(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RMI,
|
||||
dst: Reg,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(dst.get_class() == RegClass::I64);
|
||||
Inst::Cmp_RMI_R { size, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn push64(src: RMI) -> Inst {
|
||||
Inst::Push64 { src }
|
||||
}
|
||||
|
||||
pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
|
||||
Inst::Pop64 { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn call_unknown(dest: RM) -> Inst {
|
||||
Inst::CallUnknown { dest }
|
||||
}
|
||||
|
||||
pub(crate) fn ret() -> Inst {
|
||||
Inst::Ret
|
||||
}
|
||||
|
||||
pub(crate) fn epilogue_placeholder() -> Inst {
|
||||
Inst::EpiloguePlaceholder
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
|
||||
Inst::JmpKnown { dest }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_unknown(target: RM) -> Inst {
|
||||
Inst::JmpUnknown { target }
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: printing
|
||||
|
||||
impl ShowWithRRU for Inst {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
fn ljustify(s: String) -> String {
|
||||
let w = 7;
|
||||
if s.len() >= w {
|
||||
s
|
||||
} else {
|
||||
let need = usize::min(w, w - s.len());
|
||||
s + &format!("{nil: <width$}", nil = "", width = need)
|
||||
}
|
||||
}
|
||||
|
||||
fn ljustify2(s1: String, s2: String) -> String {
|
||||
ljustify(s1 + &s2)
|
||||
}
|
||||
|
||||
fn suffixLQ(is_64: bool) -> String {
|
||||
(if is_64 { "q" } else { "l" }).to_string()
|
||||
}
|
||||
|
||||
fn sizeLQ(is_64: bool) -> u8 {
|
||||
if is_64 {
|
||||
8
|
||||
} else {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
fn suffixBWLQ(size: u8) -> String {
|
||||
match size {
|
||||
1 => "b".to_string(),
|
||||
2 => "w".to_string(),
|
||||
4 => "l".to_string(),
|
||||
8 => "q".to_string(),
|
||||
_ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
|
||||
}
|
||||
}
|
||||
|
||||
match self {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
} => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2(op.to_string(), suffixLQ(*is_64)),
|
||||
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
|
||||
),
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
} => {
|
||||
if *dst_is_64 {
|
||||
format!(
|
||||
"{} ${}, {}",
|
||||
ljustify("movabsq".to_string()),
|
||||
*simm64 as i64,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8)
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{} ${}, {}",
|
||||
ljustify("movl".to_string()),
|
||||
(*simm64 as u32) as i32,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 4)
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_R { is_64, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixLQ(*is_64)),
|
||||
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
if *extMode == ExtMode::LQ {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movl".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 4)
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movz".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movq".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movs".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
),
|
||||
Inst::Mov_R_M { size, src, addr } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixBWLQ(*size)),
|
||||
show_ireg_sized(*src, mb_rru, *size),
|
||||
addr.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
} => match num_bits {
|
||||
None => format!(
|
||||
"{} %cl, {}",
|
||||
ljustify2(kind.to_string(), suffixLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
|
||||
Some(num_bits) => format!(
|
||||
"{} ${}, {}",
|
||||
ljustify2(kind.to_string(), suffixLQ(*is_64)),
|
||||
num_bits,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
},
|
||||
Inst::Cmp_RMI_R { size, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("cmp".to_string(), suffixBWLQ(*size)),
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(*dst, mb_rru, *size)
|
||||
),
|
||||
Inst::Push64 { src } => {
|
||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
|
||||
}
|
||||
//Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
|
||||
Inst::CallKnown { .. } => "**CallKnown**".to_string(),
|
||||
Inst::CallUnknown { dest } => format!(
|
||||
"{} *{}",
|
||||
ljustify("call".to_string()),
|
||||
dest.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Ret => "ret".to_string(),
|
||||
Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
|
||||
Inst::JmpKnown { dest } => {
|
||||
format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
|
||||
}
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => format!(
|
||||
"{} taken={} not_taken={}",
|
||||
ljustify2("j".to_string(), cc.to_string()),
|
||||
taken.show_rru(mb_rru),
|
||||
not_taken.show_rru(mb_rru)
|
||||
),
|
||||
//
|
||||
Inst::JmpUnknown { target } => format!(
|
||||
"{} *{}",
|
||||
ljustify("jmp".to_string()),
|
||||
target.show_rru(mb_rru)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Temp hook for legacy printing machinery
|
||||
impl fmt::Debug for Inst {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
// Print the insn without a Universe :-(
|
||||
write!(fmt, "{}", self.show_rru(None))
|
||||
}
|
||||
}
|
||||
|
||||
fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// This is a bit subtle. If some register is in the modified set, then it may not be in either
|
||||
// the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
|
||||
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
|
||||
// sets.
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
src,
|
||||
dst,
|
||||
} => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
dst,
|
||||
} => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_R { is_64: _, src, dst } => {
|
||||
collector.add_use(*src);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_M { size: _, src, addr } => {
|
||||
collector.add_use(*src);
|
||||
addr.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
if num_bits.is_none() {
|
||||
collector.add_use(regs::rcx());
|
||||
}
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R { size: _, src, dst } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_use(*dst); // yes, really `add_use`
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(Writable::from_reg(regs::rsp()));
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
} => {
|
||||
// FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
|
||||
unimplemented!();
|
||||
}
|
||||
Inst::CallUnknown { dest } => {
|
||||
dest.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.get_regs_as_uses(collector);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions and subcomponents: map_regs
|
||||
|
||||
fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
|
||||
if r.is_virtual() {
|
||||
let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
|
||||
*r = new;
|
||||
}
|
||||
}
|
||||
|
||||
fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
Addr::IR {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
} => map_use(map, base),
|
||||
Addr::IRRS {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
ref mut index,
|
||||
shift: _,
|
||||
} => {
|
||||
map_use(map, base);
|
||||
map_use(map, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RMI {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RMI::R { ref mut reg } => map_use(map, reg),
|
||||
RMI::M { ref mut addr } => addr.map_uses(map),
|
||||
RMI::I { simm32: _ } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RM {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RM::R { ref mut reg } => map_use(map, reg),
|
||||
RM::M { ref mut addr } => addr.map_uses(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
// Note this must be carefully synchronized with x64_get_regs.
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
ref mut dst,
|
||||
} => map_def(mapper, dst),
|
||||
Inst::Mov_R_R {
|
||||
is_64: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
ref mut dst,
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
ref mut dst,
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov_R_M {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut addr,
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
addr.map_uses(mapper);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits: _,
|
||||
ref mut dst,
|
||||
} => {
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_use(mapper, dst);
|
||||
}
|
||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||
Inst::Pop64 { ref mut dst } => {
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
} => {}
|
||||
Inst::CallUnknown { dest } => dest.map_uses(mapper),
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.apply_map(mapper);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: misc functions and external interface
|
||||
|
||||
impl MachInst for Inst {
|
||||
fn get_regs(&self, collector: &mut RegUsageCollector) {
|
||||
x64_get_regs(&self, collector)
|
||||
}
|
||||
|
||||
fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||
x64_map_regs(self, mapper);
|
||||
}
|
||||
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
|
||||
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
|
||||
// out the upper 32 bits of the destination. For example, we could
|
||||
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
|
||||
// %reg.
|
||||
match self {
|
||||
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_epilogue_placeholder(&self) -> bool {
|
||||
if let Self::EpiloguePlaceholder = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
|
||||
match self {
|
||||
// Interesting cases.
|
||||
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
|
||||
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
|
||||
&Self::JmpCondSymm {
|
||||
cc: _,
|
||||
taken,
|
||||
not_taken,
|
||||
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
|
||||
// All other cases are boring.
|
||||
_ => MachTerminator::None,
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
|
||||
let rc_dst = dst_reg.to_reg().get_class();
|
||||
let rc_src = src_reg.get_class();
|
||||
// If this isn't true, we have gone way off the rails.
|
||||
debug_assert!(rc_dst == rc_src);
|
||||
match rc_dst {
|
||||
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
|
||||
_ => panic!("gen_move(x64): unhandled regclass"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_zero_len_nop() -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn gen_nop(_preferred_size: usize) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
|
||||
None
|
||||
}
|
||||
|
||||
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
|
||||
match ty {
|
||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
ty
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_jump(label: MachLabel) -> Inst {
|
||||
Inst::jmp_known(BranchTarget::Label(label))
|
||||
}
|
||||
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
|
||||
let mut ret = SmallVec::new();
|
||||
let is64 = value > 0xffff_ffff;
|
||||
ret.push(Inst::imm_r(is64, value, to_reg));
|
||||
ret
|
||||
}
|
||||
|
||||
fn reg_universe(flags: &Flags) -> RealRegUniverse {
|
||||
create_reg_universe_systemv(flags)
|
||||
}
|
||||
|
||||
fn worst_case_size() -> CodeOffset {
|
||||
15
|
||||
}
|
||||
|
||||
type LabelUse = LabelUse;
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
type State = ();
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
|
||||
emit::emit(self, sink);
|
||||
}
|
||||
}
|
||||
|
||||
/// A label-use (internal relocation) in generated code.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum LabelUse {
|
||||
/// A 32-bit offset from location of relocation itself, added to the
|
||||
/// existing value at that location.
|
||||
Rel32,
|
||||
}
|
||||
|
||||
impl MachInstLabelUse for LabelUse {
|
||||
const ALIGN: CodeOffset = 1;
|
||||
|
||||
fn max_pos_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x7fff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_neg_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x8000_0000,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 4,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
|
||||
let value = i32::try_from(label_offset)
|
||||
.unwrap()
|
||||
.wrapping_sub(i32::try_from(use_offset).unwrap())
|
||||
.wrapping_add(addend);
|
||||
buffer.copy_from_slice(&value.to_le_bytes()[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn supports_veneer(self) -> bool {
|
||||
match self {
|
||||
LabelUse::Rel32 => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn veneer_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
panic!("Veneer not supported for Rel32 label-use.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,261 @@
|
|||
//! Registers, the Universe thereof, and printing.
|
||||
//!
|
||||
//! These are ordered by sequence number, as required in the Universe. The strange ordering is
|
||||
//! intended to make callee-save registers available before caller-saved ones. This is a net win
|
||||
//! provided that each function makes at least one onward call. It'll be a net loss for leaf
|
||||
//! functions, and we should change the ordering in that case, so as to make caller-save regs
|
||||
//! available first.
|
||||
//!
|
||||
//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
|
||||
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
|
||||
//! for each function we compile.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use std::string::String;
|
||||
|
||||
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
|
||||
|
||||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::settings;
|
||||
|
||||
// Hardware encodings for a few registers.
|
||||
|
||||
pub const ENC_RBX: u8 = 3;
|
||||
pub const ENC_RSP: u8 = 4;
|
||||
pub const ENC_RBP: u8 = 5;
|
||||
pub const ENC_R12: u8 = 12;
|
||||
pub const ENC_R13: u8 = 13;
|
||||
pub const ENC_R14: u8 = 14;
|
||||
pub const ENC_R15: u8 = 15;
|
||||
|
||||
fn gpr(enc: u8, index: u8) -> Reg {
|
||||
Reg::new_real(RegClass::I64, enc, index)
|
||||
}
|
||||
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 0)
|
||||
}
|
||||
pub(crate) fn r13() -> Reg {
|
||||
gpr(ENC_R13, 1)
|
||||
}
|
||||
pub(crate) fn r14() -> Reg {
|
||||
gpr(ENC_R14, 2)
|
||||
}
|
||||
pub(crate) fn r15() -> Reg {
|
||||
gpr(ENC_R15, 3)
|
||||
}
|
||||
pub(crate) fn rbx() -> Reg {
|
||||
gpr(ENC_RBX, 4)
|
||||
}
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 5)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 6)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 7)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 8)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 9)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 10)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 11)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 12)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 13)
|
||||
}
|
||||
|
||||
fn fpr(enc: u8, index: u8) -> Reg {
|
||||
Reg::new_real(RegClass::V128, enc, index)
|
||||
}
|
||||
fn xmm0() -> Reg {
|
||||
fpr(0, 14)
|
||||
}
|
||||
fn xmm1() -> Reg {
|
||||
fpr(1, 15)
|
||||
}
|
||||
fn xmm2() -> Reg {
|
||||
fpr(2, 16)
|
||||
}
|
||||
fn xmm3() -> Reg {
|
||||
fpr(3, 17)
|
||||
}
|
||||
fn xmm4() -> Reg {
|
||||
fpr(4, 18)
|
||||
}
|
||||
fn xmm5() -> Reg {
|
||||
fpr(5, 19)
|
||||
}
|
||||
fn xmm6() -> Reg {
|
||||
fpr(6, 20)
|
||||
}
|
||||
fn xmm7() -> Reg {
|
||||
fpr(7, 21)
|
||||
}
|
||||
fn xmm8() -> Reg {
|
||||
fpr(8, 22)
|
||||
}
|
||||
fn xmm9() -> Reg {
|
||||
fpr(9, 23)
|
||||
}
|
||||
fn xmm10() -> Reg {
|
||||
fpr(10, 24)
|
||||
}
|
||||
fn xmm11() -> Reg {
|
||||
fpr(11, 25)
|
||||
}
|
||||
fn xmm12() -> Reg {
|
||||
fpr(12, 26)
|
||||
}
|
||||
fn xmm13() -> Reg {
|
||||
fpr(13, 27)
|
||||
}
|
||||
fn xmm14() -> Reg {
|
||||
fpr(14, 28)
|
||||
}
|
||||
fn xmm15() -> Reg {
|
||||
fpr(15, 29)
|
||||
}
|
||||
|
||||
pub(crate) fn rsp() -> Reg {
|
||||
gpr(ENC_RSP, 30)
|
||||
}
|
||||
pub(crate) fn rbp() -> Reg {
|
||||
gpr(ENC_RBP, 31)
|
||||
}
|
||||
|
||||
/// Create the register universe for X64.
|
||||
///
|
||||
/// The ordering of registers matters, as commented in the file doc comment: assumes the
|
||||
/// calling-convention is SystemV, at the moment.
|
||||
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = Vec::<(RealReg, String)>::new();
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Integer regs.
|
||||
let mut base = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
|
||||
// XMM registers
|
||||
base = regs.len();
|
||||
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
|
||||
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
|
||||
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
|
||||
regs.push((xmm3().to_real_reg(), "%xmm3".into()));
|
||||
regs.push((xmm4().to_real_reg(), "%xmm4".into()));
|
||||
regs.push((xmm5().to_real_reg(), "%xmm5".into()));
|
||||
regs.push((xmm6().to_real_reg(), "%xmm6".into()));
|
||||
regs.push((xmm7().to_real_reg(), "%xmm7".into()));
|
||||
regs.push((xmm8().to_real_reg(), "%xmm8".into()));
|
||||
regs.push((xmm9().to_real_reg(), "%xmm9".into()));
|
||||
regs.push((xmm10().to_real_reg(), "%xmm10".into()));
|
||||
regs.push((xmm11().to_real_reg(), "%xmm11".into()));
|
||||
regs.push((xmm12().to_real_reg(), "%xmm12".into()));
|
||||
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
|
||||
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
|
||||
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
|
||||
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(xmm15().get_index()),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
allocable,
|
||||
allocable_by_class,
|
||||
}
|
||||
}
|
||||
|
||||
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
|
||||
/// smaller size (4, 2 or 1 bytes).
|
||||
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
if reg.get_class() != RegClass::I64 || size == 8 {
|
||||
// We can't do any better.
|
||||
return s;
|
||||
}
|
||||
|
||||
if reg.is_real() {
|
||||
// Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
|
||||
// describe diplomatically as "a kludge", but it's only debug code.
|
||||
let remapper = match s.as_str() {
|
||||
"%rax" => Some(["%eax", "%ax", "%al"]),
|
||||
"%rbx" => Some(["%ebx", "%bx", "%bl"]),
|
||||
"%rcx" => Some(["%ecx", "%cx", "%cl"]),
|
||||
"%rdx" => Some(["%edx", "%dx", "%dl"]),
|
||||
"%rsi" => Some(["%esi", "%si", "%sil"]),
|
||||
"%rdi" => Some(["%edi", "%di", "%dil"]),
|
||||
"%rbp" => Some(["%ebp", "%bp", "%bpl"]),
|
||||
"%rsp" => Some(["%esp", "%sp", "%spl"]),
|
||||
"%r8" => Some(["%r8d", "%r8w", "%r8b"]),
|
||||
"%r9" => Some(["%r9d", "%r9w", "%r9b"]),
|
||||
"%r10" => Some(["%r10d", "%r10w", "%r10b"]),
|
||||
"%r11" => Some(["%r11d", "%r11w", "%r11b"]),
|
||||
"%r12" => Some(["%r12d", "%r12w", "%r12b"]),
|
||||
"%r13" => Some(["%r13d", "%r13w", "%r13b"]),
|
||||
"%r14" => Some(["%r14d", "%r14w", "%r14b"]),
|
||||
"%r15" => Some(["%r15d", "%r15w", "%r15b"]),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(smaller_names) = remapper {
|
||||
match size {
|
||||
4 => s = smaller_names[0].into(),
|
||||
2 => s = smaller_names[1].into(),
|
||||
1 => s = smaller_names[2].into(),
|
||||
_ => panic!("show_ireg_sized: real"),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
|
||||
let suffix = match size {
|
||||
4 => "l",
|
||||
2 => "w",
|
||||
1 => "b",
|
||||
_ => panic!("show_ireg_sized: virtual"),
|
||||
};
|
||||
s = s + suffix;
|
||||
}
|
||||
|
||||
s
|
||||
}
|
|
@ -0,0 +1,343 @@
|
|||
//! Lowering rules for X64.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use regalloc::{Reg, Writable};
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::types;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, Type};
|
||||
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::result::CodegenResult;
|
||||
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::isa::x64::X64Backend;
|
||||
|
||||
/// Context passed to all lowering functions.
|
||||
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
|
||||
|
||||
//=============================================================================
|
||||
// Helpers for instruction lowering.
|
||||
|
||||
fn is_int_ty(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 | types::I64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_to_is64(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 => false,
|
||||
types::I64 => true,
|
||||
_ => panic!("type {} is none of I8, I16, I32 or I64", ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_to_sizeB(ty: Type) -> u8 {
|
||||
match ty {
|
||||
types::I8 => 1,
|
||||
types::I16 => 2,
|
||||
types::I32 => 4,
|
||||
types::I64 => 8,
|
||||
_ => panic!("ity_to_sizeB"),
|
||||
}
|
||||
}
|
||||
|
||||
fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
|
||||
let inst_data = ctx.data(iri);
|
||||
if inst_data.opcode() == Opcode::Null {
|
||||
Some(0)
|
||||
} else {
|
||||
match inst_data {
|
||||
&InstructionData::UnaryImm { opcode: _, imm } => {
|
||||
// Only has Into for i64; we use u64 elsewhere, so we cast.
|
||||
let imm: i64 = imm.into();
|
||||
Some(imm as u64)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inst_condcode(data: &InstructionData) -> IntCC {
|
||||
match data {
|
||||
&InstructionData::IntCond { cond, .. }
|
||||
| &InstructionData::BranchIcmp { cond, .. }
|
||||
| &InstructionData::IntCompare { cond, .. }
|
||||
| &InstructionData::IntCondTrap { cond, .. }
|
||||
| &InstructionData::BranchInt { cond, .. }
|
||||
| &InstructionData::IntSelect { cond, .. }
|
||||
| &InstructionData::IntCompareImm { cond, .. } => cond,
|
||||
_ => panic!("inst_condcode(x64): unhandled: {:?}", data),
|
||||
}
|
||||
}
|
||||
|
||||
fn intCC_to_x64_CC(cc: IntCC) -> CC {
|
||||
match cc {
|
||||
IntCC::Equal => CC::Z,
|
||||
IntCC::NotEqual => CC::NZ,
|
||||
IntCC::SignedGreaterThanOrEqual => CC::NL,
|
||||
IntCC::SignedGreaterThan => CC::NLE,
|
||||
IntCC::SignedLessThanOrEqual => CC::LE,
|
||||
IntCC::SignedLessThan => CC::L,
|
||||
IntCC::UnsignedGreaterThanOrEqual => CC::NB,
|
||||
IntCC::UnsignedGreaterThan => CC::NBE,
|
||||
IntCC::UnsignedLessThanOrEqual => CC::BE,
|
||||
IntCC::UnsignedLessThan => CC::B,
|
||||
IntCC::Overflow => CC::O,
|
||||
IntCC::NotOverflow => CC::NO,
|
||||
}
|
||||
}
|
||||
|
||||
fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
|
||||
let inputs = ctx.get_input(iri, input);
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
}
|
||||
|
||||
fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
|
||||
ctx.get_output(iri, output)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Top-level instruction lowering entry point, for one instruction.
|
||||
|
||||
/// Actually codegen an instruction's results into registers.
|
||||
fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
|
||||
let op = ctx.data(iri).opcode();
|
||||
let ty = if ctx.num_outputs(iri) == 1 {
|
||||
Some(ctx.output_ty(iri, 0))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// This is all outstandingly feeble. TODO: much better!
|
||||
|
||||
match op {
|
||||
Opcode::Iconst => {
|
||||
if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
|
||||
// Get exactly the bit pattern in 'w64' into the dest. No
|
||||
// monkeying with sign extension etc.
|
||||
let dstIs64 = w64 > 0xFFFF_FFFF;
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
ctx.emit(Inst::imm_r(dstIs64, w64, regD));
|
||||
} else {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Iadd | Opcode::Isub => {
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
let regL = input_to_reg(ctx, iri, 0);
|
||||
let regR = input_to_reg(ctx, iri, 1);
|
||||
let is64 = int_ty_to_is64(ty.unwrap());
|
||||
let how = if op == Opcode::Iadd {
|
||||
RMI_R_Op::Add
|
||||
} else {
|
||||
RMI_R_Op::Sub
|
||||
};
|
||||
ctx.emit(Inst::mov_r_r(true, regL, regD));
|
||||
ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
// TODO: implement imm shift value into insn
|
||||
let tySL = ctx.input_ty(iri, 0);
|
||||
let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
|
||||
let regSL = input_to_reg(ctx, iri, 0);
|
||||
let regSR = input_to_reg(ctx, iri, 1);
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
|
||||
let how = match op {
|
||||
Opcode::Ishl => ShiftKind::Left,
|
||||
Opcode::Ushr => ShiftKind::RightZ,
|
||||
Opcode::Sshr => ShiftKind::RightS,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is64 = tyD == types::I64;
|
||||
let r_rcx = regs::rcx();
|
||||
let w_rcx = Writable::<Reg>::from_reg(r_rcx);
|
||||
ctx.emit(Inst::mov_r_r(true, regSL, regD));
|
||||
ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
|
||||
ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Uextend | Opcode::Sextend => {
|
||||
// TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
|
||||
// don't accept a register source operand. They should be changed
|
||||
// so as to have _RM_R form.
|
||||
// TODO2: if the source operand is a load, incorporate that.
|
||||
let isZX = op == Opcode::Uextend;
|
||||
let tyS = ctx.input_ty(iri, 0);
|
||||
let tyD = ctx.output_ty(iri, 0);
|
||||
let regS = input_to_reg(ctx, iri, 0);
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
ctx.emit(Inst::mov_r_r(true, regS, regD));
|
||||
match (tyS, tyD, isZX) {
|
||||
(types::I8, types::I64, false) => {
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FallthroughReturn | Opcode::Return => {
|
||||
for i in 0..ctx.num_inputs(iri) {
|
||||
let src_reg = input_to_reg(ctx, iri, i);
|
||||
let retval_reg = ctx.retval(i);
|
||||
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
|
||||
}
|
||||
// N.B.: the Ret itself is generated by the ABI.
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
| Opcode::SdivImm
|
||||
| Opcode::UremImm
|
||||
| Opcode::SremImm
|
||||
| Opcode::IrsubImm
|
||||
| Opcode::IaddCin
|
||||
| Opcode::IaddIfcin
|
||||
| Opcode::IaddCout
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::IaddCarry
|
||||
| Opcode::IaddIfcarry
|
||||
| Opcode::IsubBin
|
||||
| Opcode::IsubIfbin
|
||||
| Opcode::IsubBout
|
||||
| Opcode::IsubIfbout
|
||||
| Opcode::IsubBorrow
|
||||
| Opcode::IsubIfborrow
|
||||
| Opcode::BandImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BxorImm
|
||||
| Opcode::RotlImm
|
||||
| Opcode::RotrImm
|
||||
| Opcode::IshlImm
|
||||
| Opcode::UshrImm
|
||||
| Opcode::SshrImm => {
|
||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||
}
|
||||
|
||||
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Lowering-backend trait implementation.
|
||||
|
||||
impl LowerBackend for X64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_insn_to_regs(ctx, ir_inst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
// A block should end with at most two branches. The first may be a
|
||||
// conditional branch; a conditional branch can be followed only by an
|
||||
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
||||
// it may be an unconditional branch, a fallthrough, a return, or a
|
||||
// trap. These conditions are verified by `is_ebb_basic()` during the
|
||||
// verifier pass.
|
||||
assert!(branches.len() <= 2);
|
||||
|
||||
let mut unimplemented = false;
|
||||
|
||||
if branches.len() == 2 {
|
||||
// Must be a conditional branch followed by an unconditional branch.
|
||||
let op0 = ctx.data(branches[0]).opcode();
|
||||
let op1 = ctx.data(branches[1]).opcode();
|
||||
|
||||
println!(
|
||||
"QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
|
||||
op0, op1
|
||||
);
|
||||
|
||||
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
|
||||
let taken = BranchTarget::Label(targets[0]);
|
||||
let not_taken = match op1 {
|
||||
Opcode::Jump => BranchTarget::Label(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
|
||||
_ => unreachable!(), // assert above.
|
||||
};
|
||||
match op0 {
|
||||
Opcode::Brz | Opcode::Brnz => {
|
||||
let tyS = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(tyS) {
|
||||
let rS = input_to_reg(ctx, branches[0], 0);
|
||||
let cc = match op0 {
|
||||
Opcode::Brz => CC::Z,
|
||||
Opcode::Brnz => CC::NZ,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let sizeB = int_ty_to_sizeB(tyS);
|
||||
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
Opcode::BrIcmp => {
|
||||
let tyS = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(tyS) {
|
||||
let rSL = input_to_reg(ctx, branches[0], 0);
|
||||
let rSR = input_to_reg(ctx, branches[0], 1);
|
||||
let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
|
||||
let sizeB = int_ty_to_sizeB(tyS);
|
||||
// FIXME verify rSR vs rSL ordering
|
||||
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
// TODO: Brif/icmp, Brff/icmp, jump tables
|
||||
_ => {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert!(branches.len() == 1);
|
||||
|
||||
// Must be an unconditional branch or trap.
|
||||
let op = ctx.data(branches[0]).opcode();
|
||||
match op {
|
||||
Opcode::Jump => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Fallthrough => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Trap => {
|
||||
unimplemented = true;
|
||||
}
|
||||
_ => panic!("Unknown branch type!"),
|
||||
}
|
||||
}
|
||||
|
||||
if unimplemented {
|
||||
unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
//! X86_64-bit Instruction Set Architecture.
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
||||
use regalloc::RealRegUniverse;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{self, Flags};
|
||||
|
||||
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
|
||||
|
||||
mod abi;
|
||||
mod inst;
|
||||
mod lower;
|
||||
|
||||
/// An X64 backend.
|
||||
pub(crate) struct X64Backend {
|
||||
triple: Triple,
|
||||
flags: Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl X64Backend {
|
||||
/// Create a new X64 backend with the given (shared) flags.
|
||||
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
|
||||
let reg_universe = create_reg_universe_systemv(&flags);
|
||||
Self {
|
||||
triple,
|
||||
flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let abi = Box::new(abi::X64ABIBody::new(&func, flags));
|
||||
compile::compile::<Self>(&func, self, abi)
|
||||
}
|
||||
}
|
||||
|
||||
impl MachBackend for X64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags.clone())?;
|
||||
let buffer = vcode.emit();
|
||||
let buffer = buffer.finish();
|
||||
let frame_size = vcode.frame_size();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(MachCompileResult {
|
||||
buffer,
|
||||
frame_size,
|
||||
disasm,
|
||||
})
|
||||
}
|
||||
|
||||
fn flags(&self) -> &Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"x64"
|
||||
}
|
||||
|
||||
fn triple(&self) -> Triple {
|
||||
self.triple.clone()
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
&self.reg_universe
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
|
||||
// overflow of an add.
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
|
||||
// underflow of a subtract (carry is borrow for subtract).
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `isa::Builder`.
|
||||
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
|
||||
let backend = X64Backend::new_with_flags(triple, flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
}
|
||||
}
|
|
@ -6,7 +6,6 @@ use super::settings as isa_settings;
|
|||
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use crate::cursor::{Cursor, CursorPosition, EncCursor};
|
||||
use crate::ir;
|
||||
use crate::ir::entities::StackSlot;
|
||||
use crate::ir::immediates::Imm64;
|
||||
use crate::ir::stackslot::{StackOffset, StackSize};
|
||||
use crate::ir::types;
|
||||
|
@ -19,7 +18,6 @@ use crate::regalloc::RegisterSet;
|
|||
use crate::result::CodegenResult;
|
||||
use crate::stack_layout::layout_stack;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::vec::Vec;
|
||||
use core::i32;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -44,7 +42,7 @@ static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
|
|||
///
|
||||
/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
|
||||
/// convention reserves spill space for parameters, you don’t have to use them as such"
|
||||
const WIN_SHADOW_STACK_SPACE: i32 = 32;
|
||||
const WIN_SHADOW_STACK_SPACE: StackSize = 32;
|
||||
|
||||
/// Stack alignment requirement for functions.
|
||||
///
|
||||
|
@ -72,6 +70,7 @@ struct Args {
|
|||
shared_flags: shared_settings::Flags,
|
||||
#[allow(dead_code)]
|
||||
isa_flags: isa_settings::Flags,
|
||||
assigning_returns: bool,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
|
@ -82,12 +81,13 @@ impl Args {
|
|||
call_conv: CallConv,
|
||||
shared_flags: &shared_settings::Flags,
|
||||
isa_flags: &isa_settings::Flags,
|
||||
assigning_returns: bool,
|
||||
) -> Self {
|
||||
let offset = if call_conv.extends_windows_fastcall() {
|
||||
WIN_SHADOW_STACK_SPACE
|
||||
} else {
|
||||
0
|
||||
} as u32;
|
||||
};
|
||||
|
||||
Self {
|
||||
pointer_bytes: bits / 8,
|
||||
|
@ -101,6 +101,7 @@ impl Args {
|
|||
call_conv,
|
||||
shared_flags: shared_flags.clone(),
|
||||
isa_flags: isa_flags.clone(),
|
||||
assigning_returns,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -109,6 +110,17 @@ impl ArgAssigner for Args {
|
|||
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
|
||||
let ty = arg.value_type;
|
||||
|
||||
if ty.bits() > u16::from(self.pointer_bits) {
|
||||
if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
|
||||
// "Any argument that doesn't fit in 8 bytes, or isn't
|
||||
// 1, 2, 4, or 8 bytes, must be passed by reference"
|
||||
return ValueConversion::Pointer(self.pointer_type).into();
|
||||
} else if !ty.is_vector() && !ty.is_float() {
|
||||
// On SystemV large integers and booleans are broken down to fit in a register.
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
}
|
||||
|
||||
// Vectors should stay in vector registers unless SIMD is not enabled--then they are split
|
||||
if ty.is_vector() {
|
||||
if self.shared_flags.enable_simd() {
|
||||
|
@ -119,11 +131,6 @@ impl ArgAssigner for Args {
|
|||
return ValueConversion::VectorSplit.into();
|
||||
}
|
||||
|
||||
// Large integers and booleans are broken down to fit in a register.
|
||||
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
|
||||
// Small integers are extended to the size of a pointer register.
|
||||
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
|
||||
match arg.extension {
|
||||
|
@ -205,7 +212,7 @@ pub fn legalize_signature(
|
|||
PointerWidth::U16 => panic!(),
|
||||
PointerWidth::U32 => {
|
||||
bits = 32;
|
||||
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags);
|
||||
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
|
||||
}
|
||||
PointerWidth::U64 => {
|
||||
bits = 64;
|
||||
|
@ -217,6 +224,7 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
false,
|
||||
)
|
||||
} else {
|
||||
Args::new(
|
||||
|
@ -226,6 +234,7 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
false,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
@ -245,26 +254,20 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
true,
|
||||
);
|
||||
|
||||
let sig_is_multi_return = sig.is_multi_return();
|
||||
|
||||
// If this is a multi-value return and we don't have enough available return
|
||||
// registers to fit all of the return values, we need to backtrack and start
|
||||
// If we don't have enough available return registers
|
||||
// to fit all of the return values, we need to backtrack and start
|
||||
// assigning locations all over again with a different strategy. In order to
|
||||
// do that, we need a copy of the original assigner for the returns.
|
||||
let backup_rets_for_struct_return = if sig_is_multi_return {
|
||||
Some(rets.clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let mut backup_rets = rets.clone();
|
||||
|
||||
if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
|
||||
if sig.is_multi_return()
|
||||
&& new_returns
|
||||
.iter()
|
||||
.filter(|r| r.purpose == ArgumentPurpose::Normal)
|
||||
.any(|r| !r.location.is_reg())
|
||||
if new_returns
|
||||
.iter()
|
||||
.filter(|r| r.purpose == ArgumentPurpose::Normal)
|
||||
.any(|r| !r.location.is_reg())
|
||||
{
|
||||
// The return values couldn't all fit into available return
|
||||
// registers. Introduce the use of a struct-return parameter.
|
||||
|
@ -276,6 +279,7 @@ pub fn legalize_signature(
|
|||
purpose: ArgumentPurpose::StructReturn,
|
||||
extension: ArgumentExtension::None,
|
||||
location: ArgumentLoc::Unassigned,
|
||||
legalized_to_pointer: false,
|
||||
};
|
||||
match args.assign(&ret_ptr_param) {
|
||||
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
|
||||
|
@ -285,8 +289,6 @@ pub fn legalize_signature(
|
|||
_ => unreachable!("return pointer should always get a register assignment"),
|
||||
}
|
||||
|
||||
let mut backup_rets = backup_rets_for_struct_return.unwrap();
|
||||
|
||||
// We're using the first return register for the return pointer (like
|
||||
// sys v does).
|
||||
let mut ret_ptr_return = AbiParam {
|
||||
|
@ -294,6 +296,7 @@ pub fn legalize_signature(
|
|||
purpose: ArgumentPurpose::StructReturn,
|
||||
extension: ArgumentExtension::None,
|
||||
location: ArgumentLoc::Unassigned,
|
||||
legalized_to_pointer: false,
|
||||
};
|
||||
match backup_rets.assign(&ret_ptr_return) {
|
||||
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
|
||||
|
@ -501,7 +504,7 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
|
|||
|
||||
let word_size = StackSize::from(isa.pointer_bytes());
|
||||
let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
|
||||
WIN_SHADOW_STACK_SPACE as u32
|
||||
WIN_SHADOW_STACK_SPACE
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
@ -525,50 +528,60 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
||||
}
|
||||
|
||||
let csrs = callee_saved_regs_used(isa, func);
|
||||
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers + shadow space
|
||||
// return address + frame pointer + all callee-saved registers
|
||||
//
|
||||
// Pushing the return address is an implicit function of the `call`
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
let word_size = isa.pointer_bytes() as usize;
|
||||
let num_fprs = csrs.iter(FPR).len();
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||
let csrs = callee_saved_regs_used(isa, func);
|
||||
let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
|
||||
let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
|
||||
let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
|
||||
|
||||
// Only create an FPR stack slot if we're going to save FPRs.
|
||||
let fpr_slot = if num_fprs > 0 {
|
||||
// Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it
|
||||
// seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be
|
||||
// through `stack_load` and `stack_store` (see later comment about issue #1198). Even
|
||||
// though in a certain light FPR preserve/restore is "spilling" an argument, regalloc
|
||||
// implies that `SpillSlot` may be eligible for certain optimizations, and we know with
|
||||
// certainty that this space may not be reused in the function, nor moved around.
|
||||
Some(func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::ExplicitSlot,
|
||||
size: (num_fprs * types::F64X2.bytes() as usize) as u32,
|
||||
offset: None,
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
|
||||
if fpsr_stack_size > 0 {
|
||||
csr_stack_size = (csr_stack_size + 15) & !15;
|
||||
}
|
||||
|
||||
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
||||
// since cranelift does not support spill slots before incoming args
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)),
|
||||
size: csr_stack_size,
|
||||
offset: Some(-(csr_stack_size as StackOffset)),
|
||||
});
|
||||
|
||||
let is_leaf = func.is_leaf();
|
||||
|
||||
// If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
|
||||
if !is_leaf {
|
||||
// TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::ExplicitSlot,
|
||||
size: WIN_SHADOW_STACK_SPACE,
|
||||
offset: None,
|
||||
});
|
||||
}
|
||||
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
|
||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||
|
||||
// Subtract the GPR saved register size from the local size because pushes are used for the saves
|
||||
let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let reg_type = isa.pointer_type();
|
||||
let sp_arg_index = if fpsr_stack_size > 0 {
|
||||
let sp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::CalleeSaved,
|
||||
RU::rsp as RegUnit,
|
||||
);
|
||||
let index = func.signature.params.len();
|
||||
func.signature.params.push(sp_arg);
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
|
@ -601,19 +614,13 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
fpr_slot.as_ref(),
|
||||
sp_arg_index.is_some(),
|
||||
isa,
|
||||
);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(
|
||||
&mut pos,
|
||||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
fpr_slot.as_ref(),
|
||||
);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -649,6 +656,20 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
|
||||
// Add CSRs to function signature
|
||||
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
|
||||
// On X86-32 all parameters, including vmctx, are passed on stack, and we need
|
||||
// to extract vmctx from the stack before we can save the frame pointer.
|
||||
let sp_arg_index = if isa.pointer_bits() == 32 {
|
||||
let sp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::CalleeSaved,
|
||||
RU::rsp as RegUnit,
|
||||
);
|
||||
let index = func.signature.params.len();
|
||||
func.signature.params.push(sp_arg);
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
|
@ -666,11 +687,18 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
// Set up the cursor and insert the prologue
|
||||
let entry_block = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
|
||||
insert_common_prologue(
|
||||
&mut pos,
|
||||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
sp_arg_index.is_some(),
|
||||
isa,
|
||||
);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, None);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -682,9 +710,18 @@ fn insert_common_prologue(
|
|||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
has_sp_param: bool,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
let sp = if has_sp_param {
|
||||
let block = pos.current_block().expect("missing block under cursor");
|
||||
let sp = pos.func.dfg.append_block_param(block, reg_type);
|
||||
pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
|
||||
Some(sp)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If this is a leaf function with zero stack, then there's no need to
|
||||
// insert a stack check since it can't overflow anything and
|
||||
// forward-progress is guarantee so long as loop are handled anyway.
|
||||
|
@ -707,7 +744,7 @@ fn insert_common_prologue(
|
|||
None => pos
|
||||
.func
|
||||
.stack_limit
|
||||
.map(|gv| interpret_gv(pos, gv, scratch)),
|
||||
.map(|gv| interpret_gv(pos, gv, sp, scratch)),
|
||||
};
|
||||
if let Some(stack_limit_arg) = stack_limit_arg {
|
||||
insert_stack_check(pos, stack_size, stack_limit_arg);
|
||||
|
@ -780,38 +817,27 @@ fn insert_common_prologue(
|
|||
}
|
||||
}
|
||||
|
||||
// Now that RSP is prepared for the function, we can use stack slots:
|
||||
// With the stack pointer adjusted, save any callee-saved floating point registers via offset
|
||||
// FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
|
||||
let mut last_fpr_save = None;
|
||||
if let Some(fpr_slot) = fpr_slot {
|
||||
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||
|
||||
// `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||
// instead of hoping that the addr/store will be combined later.
|
||||
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||
for (i, reg) in csrs.iter(FPR).enumerate() {
|
||||
// Append param to entry block
|
||||
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
|
||||
|
||||
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||
// function entry.
|
||||
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||
// Since regalloc has already run, we must assign a location.
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
let mut fpr_offset = 0;
|
||||
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
|
||||
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
|
||||
+ (stack_size % types::F64X2.bytes() as i64);
|
||||
|
||||
for reg in csrs.iter(FPR) {
|
||||
// Append param to entry Block
|
||||
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
|
||||
|
||||
// Since regalloc has already run, we must assign a location.
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
last_fpr_save =
|
||||
Some(
|
||||
pos.ins()
|
||||
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset),
|
||||
);
|
||||
|
||||
fpr_offset += types::F64X2.bytes() as i32;
|
||||
}
|
||||
last_fpr_save = Some(pos.ins().store(
|
||||
ir::MemFlags::trusted(),
|
||||
csr_arg,
|
||||
sp.expect("FPR save requires SP param"),
|
||||
(stack_size - offset) as i32,
|
||||
));
|
||||
}
|
||||
|
||||
pos.func.prologue_end = Some(
|
||||
|
@ -834,19 +860,55 @@ fn insert_common_prologue(
|
|||
/// compared to the stack pointer, but currently it serves enough functionality
|
||||
/// to get this implemented in `wasmtime` itself. This'll likely get expanded a
|
||||
/// bit over time!
|
||||
fn interpret_gv(pos: &mut EncCursor, gv: ir::GlobalValue, scratch: ir::ValueLoc) -> ir::Value {
|
||||
fn interpret_gv(
|
||||
pos: &mut EncCursor,
|
||||
gv: ir::GlobalValue,
|
||||
sp: Option<ir::Value>,
|
||||
scratch: ir::ValueLoc,
|
||||
) -> ir::Value {
|
||||
match pos.func.global_values[gv] {
|
||||
ir::GlobalValueData::VMContext => pos
|
||||
.func
|
||||
.special_param(ir::ArgumentPurpose::VMContext)
|
||||
.expect("no vmcontext parameter found"),
|
||||
ir::GlobalValueData::VMContext => {
|
||||
let vmctx_index = pos
|
||||
.func
|
||||
.signature
|
||||
.special_param_index(ir::ArgumentPurpose::VMContext)
|
||||
.expect("no vmcontext parameter found");
|
||||
match pos.func.signature.params[vmctx_index] {
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Reg(_),
|
||||
..
|
||||
} => {
|
||||
let entry = pos.func.layout.entry_block().unwrap();
|
||||
pos.func.dfg.block_params(entry)[vmctx_index]
|
||||
}
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Stack(offset),
|
||||
value_type,
|
||||
..
|
||||
} => {
|
||||
let offset =
|
||||
offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
|
||||
// The following access can be marked `trusted` because it is a load of an argument. We
|
||||
// know it is safe because it was safe to write it in preparing this function call.
|
||||
let ret =
|
||||
pos.ins()
|
||||
.load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
|
||||
pos.func.locations[ret] = scratch;
|
||||
return ret;
|
||||
}
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Unassigned,
|
||||
..
|
||||
} => unreachable!(),
|
||||
}
|
||||
}
|
||||
ir::GlobalValueData::Load {
|
||||
base,
|
||||
offset,
|
||||
global_type,
|
||||
readonly: _,
|
||||
} => {
|
||||
let base = interpret_gv(pos, base, scratch);
|
||||
let base = interpret_gv(pos, base, sp, scratch);
|
||||
let ret = pos
|
||||
.ins()
|
||||
.load(global_type, ir::MemFlags::trusted(), base, offset);
|
||||
|
@ -911,13 +973,13 @@ fn insert_common_epilogues(
|
|||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
sp_arg_index: Option<usize>,
|
||||
) {
|
||||
while let Some(block) = pos.next_block() {
|
||||
pos.goto_last_inst(block);
|
||||
if let Some(inst) = pos.current_inst() {
|
||||
if pos.func.dfg[inst].opcode().is_return() {
|
||||
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, fpr_slot);
|
||||
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, sp_arg_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -931,56 +993,8 @@ fn insert_common_epilogue(
|
|||
pos: &mut EncCursor,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
sp_arg_index: Option<usize>,
|
||||
) {
|
||||
// Even though instructions to restore FPRs are inserted first, we have to append them after
|
||||
// restored GPRs to satisfy parameter order in the return.
|
||||
let mut restored_fpr_values = Vec::new();
|
||||
|
||||
// Restore FPRs before we move RSP and invalidate stack slots.
|
||||
let mut first_fpr_load = None;
|
||||
if let Some(fpr_slot) = fpr_slot {
|
||||
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||
|
||||
// `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||
// instead of hoping that the addr/store will be combined later.
|
||||
//
|
||||
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||
|
||||
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
|
||||
|
||||
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||
// function exit.
|
||||
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||
|
||||
let mut fpr_offset = 0;
|
||||
|
||||
for reg in csrs.iter(FPR) {
|
||||
let value = pos.ins().load(
|
||||
types::F64X2,
|
||||
ir::MemFlags::trusted(),
|
||||
stack_addr,
|
||||
fpr_offset,
|
||||
);
|
||||
fpr_offset += types::F64X2.bytes() as i32;
|
||||
|
||||
// Unlike GPRs before, we don't need to step back after reach restoration because FPR
|
||||
// restoration is order-insensitive. Furthermore: we want GPR restoration to begin
|
||||
// after FPR restoration, so that stack adjustments occur after we're done relying on
|
||||
// StackSlot validity.
|
||||
|
||||
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
|
||||
restored_fpr_values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
let mut sp_adjust_inst = None;
|
||||
if stack_size > 0 {
|
||||
sp_adjust_inst = Some(pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)));
|
||||
}
|
||||
|
||||
// Insert the pop of the frame pointer
|
||||
let fp_pop = pos.ins().x86_pop(reg_type);
|
||||
let fp_pop_inst = pos.prev_inst().unwrap();
|
||||
|
@ -991,13 +1005,47 @@ fn insert_common_epilogue(
|
|||
let mut first_csr_pop_inst = None;
|
||||
for reg in csrs.iter(GPR) {
|
||||
let csr_pop = pos.ins().x86_pop(reg_type);
|
||||
first_csr_pop_inst = Some(pos.prev_inst().unwrap());
|
||||
first_csr_pop_inst = pos.prev_inst();
|
||||
assert!(first_csr_pop_inst.is_some());
|
||||
pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, csr_pop);
|
||||
}
|
||||
|
||||
for value in restored_fpr_values.into_iter() {
|
||||
pos.func.dfg.append_inst_arg(inst, value);
|
||||
// Insert the adjustment of SP
|
||||
let mut sp_adjust_inst = None;
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
|
||||
sp_adjust_inst = pos.prev_inst();
|
||||
assert!(sp_adjust_inst.is_some());
|
||||
}
|
||||
|
||||
let mut first_fpr_load = None;
|
||||
if let Some(index) = sp_arg_index {
|
||||
let sp = pos
|
||||
.func
|
||||
.dfg
|
||||
.block_params(pos.func.layout.entry_block().unwrap())[index];
|
||||
|
||||
// Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
|
||||
for (i, reg) in csrs.iter(FPR).enumerate() {
|
||||
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
|
||||
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
|
||||
+ (stack_size % types::F64X2.bytes() as i64);
|
||||
|
||||
let value = pos.ins().load(
|
||||
types::F64X2,
|
||||
ir::MemFlags::trusted(),
|
||||
sp,
|
||||
(stack_size - offset) as i32,
|
||||
);
|
||||
|
||||
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
|
||||
|
||||
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, value);
|
||||
}
|
||||
} else {
|
||||
assert!(csrs.iter(FPR).len() == 0);
|
||||
}
|
||||
|
||||
pos.func.epilogues_start.push(
|
||||
|
|
|
@ -13,6 +13,7 @@ use crate::isa::encoding::base_size;
|
|||
use crate::isa::encoding::{Encoding, RecipeSizing};
|
||||
use crate::isa::RegUnit;
|
||||
use crate::isa::{self, TargetIsa};
|
||||
use crate::legalizer::expand_as_libcall;
|
||||
use crate::predicates;
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
|
@ -246,6 +247,20 @@ fn size_with_inferred_rex_for_inreg0_inreg1(
|
|||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
|
||||
fn size_with_inferred_rex_for_inreg1_inreg2(
|
||||
sizing: &RecipeSizing,
|
||||
_enc: Encoding,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
// No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
|
||||
let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
|
||||
|| test_input(2, inst, divert, func, is_extended_reg);
|
||||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, based on a single
|
||||
/// input register and a single output register.
|
||||
fn size_with_inferred_rex_for_inreg0_outreg0(
|
||||
|
@ -1181,10 +1196,10 @@ fn convert_extractlane(
|
|||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::ExtractLane {
|
||||
if let ir::InstructionData::BinaryImm8 {
|
||||
opcode: ir::Opcode::Extractlane,
|
||||
arg,
|
||||
lane,
|
||||
imm: lane,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
// NOTE: the following legalization assumes that the upper bits of the XMM register do
|
||||
|
@ -1237,10 +1252,10 @@ fn convert_insertlane(
|
|||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::InsertLane {
|
||||
if let ir::InstructionData::TernaryImm8 {
|
||||
opcode: ir::Opcode::Insertlane,
|
||||
args: [vector, replacement],
|
||||
lane,
|
||||
imm: lane,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let value_type = pos.func.dfg.value_type(vector);
|
||||
|
@ -1255,7 +1270,7 @@ fn convert_insertlane(
|
|||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_insertps(vector, immediate, replacement)
|
||||
.x86_insertps(vector, replacement, immediate)
|
||||
}
|
||||
F64X2 => {
|
||||
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
|
||||
|
@ -1283,7 +1298,7 @@ fn convert_insertlane(
|
|||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_pinsr(vector, lane, replacement);
|
||||
.x86_pinsr(vector, replacement, lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1318,6 +1333,39 @@ fn convert_ineg(
|
|||
}
|
||||
}
|
||||
|
||||
fn expand_dword_to_xmm<'f>(
|
||||
pos: &mut FuncCursor<'_>,
|
||||
arg: ir::Value,
|
||||
arg_type: ir::Type,
|
||||
) -> ir::Value {
|
||||
if arg_type == I64 {
|
||||
let (arg_lo, arg_hi) = pos.ins().isplit(arg);
|
||||
let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
|
||||
let arg = pos.ins().insertlane(arg, arg_hi, 1);
|
||||
let arg = pos.ins().raw_bitcast(I64X2, arg);
|
||||
arg
|
||||
} else {
|
||||
pos.ins().bitcast(I64X2, arg)
|
||||
}
|
||||
}
|
||||
|
||||
fn contract_dword_from_xmm<'f>(
|
||||
pos: &mut FuncCursor<'f>,
|
||||
inst: ir::Inst,
|
||||
ret: ir::Value,
|
||||
ret_type: ir::Type,
|
||||
) {
|
||||
if ret_type == I64 {
|
||||
let ret = pos.ins().raw_bitcast(I32X4, ret);
|
||||
let ret_lo = pos.ins().extractlane(ret, 0);
|
||||
let ret_hi = pos.ins().extractlane(ret, 1);
|
||||
pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
|
||||
} else {
|
||||
let ret = pos.ins().extractlane(ret, 0);
|
||||
pos.func.dfg.replace(inst).ireduce(ret_type, ret);
|
||||
}
|
||||
}
|
||||
|
||||
// Masks for i8x16 unsigned right shift.
|
||||
static USHR_MASKS: [u8; 128] = [
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
|
@ -1379,7 +1427,24 @@ fn convert_ushr(
|
|||
} else if arg0_type.is_vector() {
|
||||
// x86 has encodings for these shifts.
|
||||
pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
|
||||
} else if arg0_type == I64 {
|
||||
// 64 bit shifts need to be legalized on x86_32.
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.has_sse41() {
|
||||
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
|
||||
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
|
||||
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
|
||||
let shifted = pos.ins().x86_psrl(value, amount);
|
||||
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
|
||||
} else {
|
||||
// otherwise legalize to libcall
|
||||
expand_as_libcall(inst, func, isa);
|
||||
}
|
||||
} else {
|
||||
// Everything else should be already legal.
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
@ -1446,12 +1511,76 @@ fn convert_ishl(
|
|||
} else if arg0_type.is_vector() {
|
||||
// x86 has encodings for these shifts.
|
||||
pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
|
||||
} else if arg0_type == I64 {
|
||||
// 64 bit shifts need to be legalized on x86_32.
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.has_sse41() {
|
||||
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
|
||||
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
|
||||
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
|
||||
let shifted = pos.ins().x86_psll(value, amount);
|
||||
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
|
||||
} else {
|
||||
// otherwise legalize to libcall
|
||||
expand_as_libcall(inst, func, isa);
|
||||
}
|
||||
} else {
|
||||
// Everything else should be already legal.
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
|
||||
fn convert_i64x2_imul(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Imul,
|
||||
args: [arg0, arg1],
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty == I64X2 {
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
|
||||
// If we have certain AVX512 features, we can lower this instruction simply.
|
||||
pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
|
||||
} else {
|
||||
// Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
|
||||
// 64-bit lane into 32-bit high and low sections using shifting and then performs
|
||||
// the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
|
||||
// concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
|
||||
let high0 = pos.ins().ushr_imm(arg0, 32);
|
||||
let mul0 = pos.ins().x86_pmuludq(high0, arg1);
|
||||
let high1 = pos.ins().ushr_imm(arg1, 32);
|
||||
let mul1 = pos.ins().x86_pmuludq(high1, arg0);
|
||||
let addhigh = pos.ins().iadd(mul0, mul1);
|
||||
let high = pos.ins().ishl_imm(addhigh, 32);
|
||||
let low = pos.ins().x86_pmuludq(arg0, arg1);
|
||||
pos.func.dfg.replace(inst).iadd(low, high);
|
||||
}
|
||||
} else {
|
||||
unreachable!(
|
||||
"{} should be encodable; it cannot be legalized by convert_i64x2_imul",
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_tls_value(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
|
|
|
@ -23,6 +23,7 @@ use crate::result::CodegenResult;
|
|||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -53,12 +54,23 @@ fn isa_constructor(
|
|||
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
|
||||
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
|
||||
};
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
|
||||
let isa_flags = settings::Flags::new(&shared_flags, builder);
|
||||
|
||||
if isa_flags.use_new_backend() {
|
||||
#[cfg(not(feature = "x64"))]
|
||||
panic!("new backend x86 support not included by cargo features!");
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
super::x64::isa_builder(triple).finish(shared_flags)
|
||||
} else {
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
|
@ -173,6 +185,10 @@ impl TargetIsa for Isa {
|
|||
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
|
||||
Some(unwind::systemv::create_cie())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
|
|
|
@ -28,22 +28,7 @@ pub(crate) fn create_unwind_info(
|
|||
let mut prologue_size = 0;
|
||||
let mut unwind_codes = Vec::new();
|
||||
let mut found_end = false;
|
||||
|
||||
// Have we saved at least one FPR? if so, we might have to check additional constraints.
|
||||
let mut saved_fpr = false;
|
||||
|
||||
// In addition to the min offset for a callee-save, we need to know the offset from the
|
||||
// frame base to the stack pointer, so that we can record an unwind offset that spans only
|
||||
// to the end of callee-save space.
|
||||
let mut static_frame_allocation_size = 0u32;
|
||||
|
||||
// For the time being, FPR preservation is split into a stack_addr and later store/load.
|
||||
// Store the register used for stack store and ensure it is the same register with no
|
||||
// intervening changes to the frame size.
|
||||
let mut callee_save_region_reg = None;
|
||||
// Also record the callee-save region's offset from RSP, because it must be added to FPR
|
||||
// save offsets to compute an offset from the frame base.
|
||||
let mut callee_save_offset = None;
|
||||
let mut xmm_save_count: u8 = 0;
|
||||
|
||||
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
||||
// x64 ABI prologues cannot exceed 255 bytes in length
|
||||
|
@ -60,8 +45,6 @@ pub(crate) fn create_unwind_info(
|
|||
InstructionData::Unary { opcode, arg } => {
|
||||
match opcode {
|
||||
Opcode::X86Push => {
|
||||
static_frame_allocation_size += 8;
|
||||
|
||||
unwind_codes.push(UnwindCode::PushRegister {
|
||||
offset: unwind_offset,
|
||||
reg: GPR.index_of(func.locations[arg].unwrap_reg()) as u8,
|
||||
|
@ -70,7 +53,6 @@ pub(crate) fn create_unwind_info(
|
|||
Opcode::AdjustSpDown => {
|
||||
let stack_size =
|
||||
stack_size.expect("expected a previous stack size instruction");
|
||||
static_frame_allocation_size += stack_size;
|
||||
|
||||
// This is used when calling a stack check function
|
||||
// We need to track the assignment to RAX which has the size of the stack
|
||||
|
@ -85,10 +67,6 @@ pub(crate) fn create_unwind_info(
|
|||
InstructionData::CopySpecial { src, dst, .. } => {
|
||||
if let Some(frame_register) = frame_register {
|
||||
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
||||
// Constructing an rbp-based stack frame, so the static frame
|
||||
// allocation restarts at 0 from here.
|
||||
static_frame_allocation_size = 0;
|
||||
|
||||
unwind_codes.push(UnwindCode::SetFramePointer {
|
||||
offset: unwind_offset,
|
||||
sp_offset: 0,
|
||||
|
@ -113,7 +91,7 @@ pub(crate) fn create_unwind_info(
|
|||
let imm: i64 = imm.into();
|
||||
assert!(imm <= core::u32::MAX as i64);
|
||||
|
||||
static_frame_allocation_size += imm as u32;
|
||||
stack_size = Some(imm as u32);
|
||||
|
||||
unwind_codes.push(UnwindCode::StackAlloc {
|
||||
offset: unwind_offset,
|
||||
|
@ -123,52 +101,27 @@ pub(crate) fn create_unwind_info(
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::StackLoad {
|
||||
opcode: Opcode::StackAddr,
|
||||
stack_slot,
|
||||
offset: _,
|
||||
} => {
|
||||
let result = func.dfg.inst_results(inst).get(0).unwrap();
|
||||
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
|
||||
callee_save_region_reg = Some(frame_reg);
|
||||
|
||||
// Figure out the offset in the call frame that `frame_reg` will have.
|
||||
let frame_size = func
|
||||
.stack_slots
|
||||
.layout_info
|
||||
.expect("func's stack slots have layout info if stack operations exist")
|
||||
.frame_size;
|
||||
// Because we're well after the prologue has been constructed, stack slots
|
||||
// must have been laid out...
|
||||
let slot_offset = func.stack_slots[stack_slot]
|
||||
.offset
|
||||
.expect("callee-save slot has an offset computed");
|
||||
let frame_offset = frame_size as i32 + slot_offset;
|
||||
|
||||
callee_save_offset = Some(frame_offset as u32);
|
||||
}
|
||||
}
|
||||
InstructionData::Store {
|
||||
opcode: Opcode::Store,
|
||||
args: [arg1, arg2],
|
||||
flags: _flags,
|
||||
offset,
|
||||
..
|
||||
} => {
|
||||
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
|
||||
if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
|
||||
(func.locations[arg1], func.locations[arg2])
|
||||
{
|
||||
if Some(base_ru) == callee_save_region_reg {
|
||||
let offset_int: i32 = offset.into();
|
||||
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
|
||||
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
|
||||
if FPR.contains(ru) {
|
||||
saved_fpr = true;
|
||||
unwind_codes.push(UnwindCode::SaveXmm {
|
||||
offset: unwind_offset,
|
||||
reg: ru as u8,
|
||||
stack_offset: offset_int,
|
||||
});
|
||||
}
|
||||
// If this is a save of an FPR, record an unwind operation
|
||||
// Note: the stack_offset here is relative to an adjusted SP
|
||||
// This will be fixed up later to be based on the frame pointer offset
|
||||
if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
|
||||
let offset: i32 = offset.into();
|
||||
unwind_codes.push(UnwindCode::SaveXmm {
|
||||
offset: unwind_offset,
|
||||
reg: src as u8,
|
||||
stack_offset: offset as u32,
|
||||
});
|
||||
|
||||
xmm_save_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -183,41 +136,45 @@ pub(crate) fn create_unwind_info(
|
|||
|
||||
assert!(found_end);
|
||||
|
||||
if saved_fpr {
|
||||
if static_frame_allocation_size > 240 && saved_fpr {
|
||||
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
|
||||
This is a Cranelift implementation limit, see \
|
||||
https://github.com/bytecodealliance/wasmtime/issues/1475",
|
||||
static_frame_allocation_size);
|
||||
return Err(CodegenError::ImplLimitExceeded);
|
||||
// When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
|
||||
// register minus some offset, forming a "base address". This attempts to calculate the frame register offset
|
||||
// while updating the XMM save offsets to be relative from this "base address" rather than RSP.
|
||||
let mut frame_register_offset = 0;
|
||||
if frame_register.is_some() && xmm_save_count > 0 {
|
||||
// Determine the number of 16-byte slots used for all CSRs (including GPRs)
|
||||
// The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
|
||||
// Assumption: each FPR is stored at a lower address than the previous one
|
||||
let mut last_stack_offset = None;
|
||||
let mut fpr_save_count: u8 = 0;
|
||||
let mut gpr_push_count: u8 = 0;
|
||||
for code in unwind_codes.iter_mut() {
|
||||
match code {
|
||||
UnwindCode::SaveXmm { stack_offset, .. } => {
|
||||
if let Some(last) = last_stack_offset {
|
||||
assert!(last > *stack_offset);
|
||||
}
|
||||
last_stack_offset = Some(*stack_offset);
|
||||
fpr_save_count += 1;
|
||||
*stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
|
||||
}
|
||||
UnwindCode::PushRegister { .. } => {
|
||||
gpr_push_count += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
|
||||
// panicking when alignment is elided because no FPRs are saved and no child calls are
|
||||
// made.
|
||||
assert!(
|
||||
static_frame_allocation_size % 16 == 0,
|
||||
"static frame allocation must be a multiple of 16"
|
||||
);
|
||||
}
|
||||
assert_eq!(fpr_save_count, xmm_save_count);
|
||||
|
||||
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
|
||||
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
|
||||
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
|
||||
// frames are inexpressible, and we cannot actually compile the function. In case there are
|
||||
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
|
||||
// will actually check it. This, then, avoids panics when compiling functions with large
|
||||
// call frames.
|
||||
let reported_frame_offset = if saved_fpr {
|
||||
(static_frame_allocation_size / 16) as u8
|
||||
} else {
|
||||
0
|
||||
};
|
||||
// Account for alignment space when there's an odd number of GPR pushes
|
||||
// Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
|
||||
frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
|
||||
}
|
||||
|
||||
Ok(Some(UnwindInfo {
|
||||
flags: 0, // this assumes cranelift functions have no SEH handlers
|
||||
prologue_size: prologue_size as u8,
|
||||
frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
|
||||
frame_register_offset: reported_frame_offset,
|
||||
frame_register_offset,
|
||||
unwind_codes,
|
||||
}))
|
||||
}
|
||||
|
@ -284,7 +241,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 9,
|
||||
size: 64 + 32
|
||||
size: 64
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -303,7 +260,7 @@ mod tests {
|
|||
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x09, // Prolog offset
|
||||
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
|
||||
0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
@ -349,7 +306,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 10000 + 32
|
||||
size: 10000
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -369,8 +326,8 @@ mod tests {
|
|||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
|
||||
0xE6, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
|
||||
0xE2, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
@ -414,7 +371,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 1000000 + 32
|
||||
size: 1000000
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -434,10 +391,10 @@ mod tests {
|
|||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
|
||||
0x60, // Byte 1 of size
|
||||
0x40, // Byte 1 of size
|
||||
0x42, // Byte 2 of size
|
||||
0x0F, // Byte 3 of size
|
||||
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
|
||||
0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
|
|
@ -504,6 +504,13 @@ where
|
|||
// this value.
|
||||
pos.ins().with_results([into_result]).ireduce(ty, arg)
|
||||
}
|
||||
// ABI argument is a pointer to the value we want.
|
||||
ValueConversion::Pointer(abi_ty) => {
|
||||
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
pos.ins()
|
||||
.with_results([into_result])
|
||||
.load(ty, MemFlags::new(), arg, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -563,6 +570,18 @@ fn convert_to_abi<PutArg>(
|
|||
let arg = pos.ins().uextend(abi_ty, value);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
ValueConversion::Pointer(abi_ty) => {
|
||||
// Note: This conversion can only happen for call arguments,
|
||||
// so we can allocate the value on stack safely.
|
||||
let stack_slot = pos.func.create_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
size: ty.bytes(),
|
||||
offset: None,
|
||||
});
|
||||
let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0);
|
||||
pos.ins().store(MemFlags::new(), value, arg, 0);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -757,12 +776,6 @@ pub fn handle_call_abi(
|
|||
{
|
||||
legalize_sret_call(isa, pos, sig_ref, inst);
|
||||
} else {
|
||||
// OK, we need to fix the call arguments to match the ABI signature.
|
||||
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.dfg.signatures[sig_ref].params[abi_arg]
|
||||
});
|
||||
|
||||
if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
|
||||
inst = legalize_inst_results(pos, |func, abi_res| {
|
||||
func.dfg.signatures[sig_ref].returns[abi_res]
|
||||
|
@ -770,6 +783,13 @@ pub fn handle_call_abi(
|
|||
}
|
||||
}
|
||||
|
||||
// Go back and fix the call arguments to match the ABI signature.
|
||||
pos.goto_inst(inst);
|
||||
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.dfg.signatures[sig_ref].params[abi_arg]
|
||||
});
|
||||
|
||||
debug_assert!(
|
||||
check_call_signature(&pos.func.dfg, inst).is_ok(),
|
||||
"Signature still wrong: {}, {}{}",
|
||||
|
@ -814,7 +834,12 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
|
|||
pos.use_srcloc(inst);
|
||||
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.signature.returns[abi_arg]
|
||||
let arg = func.signature.returns[abi_arg];
|
||||
debug_assert!(
|
||||
!arg.legalized_to_pointer,
|
||||
"Return value cannot be legalized to pointer"
|
||||
);
|
||||
arg
|
||||
});
|
||||
// Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
|
||||
// the legalized signature. These values should simply be propagated from the entry block
|
||||
|
|
|
@ -35,7 +35,7 @@ mod table;
|
|||
use self::call::expand_call;
|
||||
use self::globalvalue::expand_global_value;
|
||||
use self::heap::expand_heap_addr;
|
||||
use self::libcall::expand_as_libcall;
|
||||
pub(crate) use self::libcall::expand_as_libcall;
|
||||
use self::table::expand_table_addr;
|
||||
|
||||
enum LegalizeInstResult {
|
||||
|
|
|
@ -99,12 +99,12 @@ mod iterators;
|
|||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod num_uses;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
mod redundant_reload_remover;
|
||||
mod regalloc;
|
||||
mod remove_constant_phis;
|
||||
mod result;
|
||||
mod scoped_hash_map;
|
||||
mod simple_gvn;
|
||||
|
@ -114,6 +114,9 @@ mod topo_order;
|
|||
mod unreachable_code;
|
||||
mod value_label;
|
||||
|
||||
#[cfg(feature = "enable-peepmatic")]
|
||||
mod peepmatic;
|
||||
|
||||
pub use crate::result::{CodegenError, CodegenResult};
|
||||
|
||||
/// Version number of this crate.
|
||||
|
|
|
@ -12,6 +12,15 @@ pub trait ABIBody {
|
|||
/// The instruction type for the ISA associated with this ABI.
|
||||
type I: VCodeInst;
|
||||
|
||||
/// Does the ABI-body code need a temp reg? One will be provided to `init()`
|
||||
/// as the `maybe_tmp` arg if so.
|
||||
fn temp_needed(&self) -> bool;
|
||||
|
||||
/// Initialize. This is called after the ABIBody is constructed because it
|
||||
/// may be provided with a temp vreg, which can only be allocated once the
|
||||
/// lowering context exists.
|
||||
fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
|
||||
|
||||
/// Get the settings controlling this function's compilation.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
|
@ -34,6 +43,13 @@ pub trait ABIBody {
|
|||
/// register.
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Generate any setup instruction needed to save values to the
|
||||
/// return-value area. This is usually used when were are multiple return
|
||||
/// values or an otherwise large return value that must be passed on the
|
||||
/// stack; typically the ABI specifies an extra hidden argument that is a
|
||||
/// pointer to that memory.
|
||||
fn gen_retval_area_setup(&self) -> Option<Self::I>;
|
||||
|
||||
/// Generate an instruction which copies a source register to a return value slot.
|
||||
fn gen_copy_reg_to_retval(
|
||||
&self,
|
||||
|
@ -98,7 +114,10 @@ pub trait ABIBody {
|
|||
fn gen_epilogue(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||
/// comprises the spill space, incoming argument space, alignment padding, etc.
|
||||
/// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
|
||||
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
|
||||
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
|
||||
/// Baldrdash (SpiderMonkey integration).
|
||||
fn frame_size(&self) -> u32;
|
||||
|
||||
/// Get the spill-slot size.
|
||||
|
@ -132,24 +151,29 @@ pub trait ABICall {
|
|||
/// Get the number of arguments expected.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Copy an argument value from a source register, prior to the call.
|
||||
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
|
||||
/// Emit a copy of an argument value from a source register, prior to the call.
|
||||
fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
idx: usize,
|
||||
from_reg: Reg,
|
||||
) -> Vec<Self::I>;
|
||||
);
|
||||
|
||||
/// Copy a return value into a destination register, after the call returns.
|
||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
/// Emit a copy a return value into a destination register, after the call returns.
|
||||
fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
idx: usize,
|
||||
into_reg: Writable<Reg>,
|
||||
);
|
||||
|
||||
/// Pre-adjust the stack, prior to argument copies and call.
|
||||
fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;
|
||||
/// Emit code to pre-adjust the stack, prior to argument copies and call.
|
||||
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
|
||||
/// Post-adjust the satck, after call return and return-value copies.
|
||||
fn gen_stack_post_adjust(&self) -> Vec<Self::I>;
|
||||
/// Emit code to post-adjust the satck, after call return and return-value copies.
|
||||
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
|
||||
/// Generate the call itself.
|
||||
/// Emit the call itself.
|
||||
///
|
||||
/// The returned instruction should have proper use- and def-sets according
|
||||
/// to the argument registers, return-value registers, and clobbered
|
||||
|
@ -159,5 +183,8 @@ pub trait ABICall {
|
|||
/// registers are also logically defs, but should never be read; their
|
||||
/// values are "defined" (to the regalloc) but "undefined" in every other
|
||||
/// sense.)
|
||||
fn gen_call(&self) -> Vec<Self::I>;
|
||||
///
|
||||
/// This function should only be called once, as it is allowed to re-use
|
||||
/// parts of the ABICall object in emitting instructions.
|
||||
fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ use crate::settings::Flags;
|
|||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
use core::any::Any;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
@ -127,4 +128,8 @@ impl TargetIsa for TargetIsaAdapter {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_sub_overflow_condition()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,59 +1,624 @@
|
|||
//! Computation of basic block order in emitted code.
|
||||
//!
|
||||
//! This module handles the translation from CLIF BBs to VCode BBs.
|
||||
//!
|
||||
//! The basic idea is that we compute a sequence of "lowered blocks" that
|
||||
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
|
||||
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
|
||||
//! moves for phi-nodes on every block-to-block transfer; these blocks always
|
||||
//! conceptually exist, but may be merged with an "original" CLIF block (and
|
||||
//! hence not actually exist; this is equivalent to inserting the blocks only on
|
||||
//! critical edges).
|
||||
//!
|
||||
//! In other words, starting from a CFG like this (where each "CLIF block" and
|
||||
//! "(edge N->M)" is a separate basic block):
|
||||
//!
|
||||
//! ```plain
|
||||
//!
|
||||
//! CLIF block 0
|
||||
//! / \
|
||||
//! (edge 0->1) (edge 0->2)
|
||||
//! | |
|
||||
//! CLIF block 1 CLIF block 2
|
||||
//! \ /
|
||||
//! (edge 1->3) (edge 2->3)
|
||||
//! \ /
|
||||
//! CLIF block 3
|
||||
//! ```
|
||||
//!
|
||||
//! We can produce a CFG of lowered blocks like so:
|
||||
//!
|
||||
//! ```plain
|
||||
//! +--------------+
|
||||
//! | CLIF block 0 |
|
||||
//! +--------------+
|
||||
//! / \
|
||||
//! +--------------+ +--------------+
|
||||
//! | (edge 0->1) | |(edge 0->2) |
|
||||
//! | CLIF block 1 | | CLIF block 2 |
|
||||
//! +--------------+ +--------------+
|
||||
//! \ /
|
||||
//! +-----------+ +-----------+
|
||||
//! |(edge 1->3)| |(edge 2->3)|
|
||||
//! +-----------+ +-----------+
|
||||
//! \ /
|
||||
//! +------------+
|
||||
//! |CLIF block 3|
|
||||
//! +------------+
|
||||
//! ```
|
||||
//!
|
||||
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
|
||||
//! blocks' original bodies, but the out-edges could not because for simplicity
|
||||
//! in the successor-function definition, we only ever merge an edge onto one
|
||||
//! side of an original CLIF block.)
|
||||
//!
|
||||
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
|
||||
//! block prepended or appended with an edge block (never both, though), or just
|
||||
//! an edge block.
|
||||
//!
|
||||
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
|
||||
//! (never actually materialized, just defined by a "successors" function), and
|
||||
//! compute the reverse postorder.
|
||||
//!
|
||||
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
|
||||
//! example, consider any information about whether edge blocks will actually
|
||||
//! have content, because this computation happens as part of lowering *before*
|
||||
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
|
||||
//! particular edge. But it works relatively well and is conceptually simple.
|
||||
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
|
||||
//! branch editing that in practice elides empty blocks and simplifies some of
|
||||
//! the other redundancies that this scheme produces.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::ir::{Block, Function, Inst, Opcode};
|
||||
use crate::machinst::lower::visit_block_succs;
|
||||
use crate::machinst::*;
|
||||
use regalloc::{BlockIx, Function};
|
||||
|
||||
/// Simple reverse postorder-based block order emission.
|
||||
///
|
||||
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
|
||||
/// construction algorithm.
|
||||
struct BlockRPO {
|
||||
visited: Vec<bool>,
|
||||
postorder: Vec<BlockIndex>,
|
||||
deferred_last: Option<BlockIndex>,
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Mapping from CLIF BBs to VCode BBs.
|
||||
#[derive(Debug)]
|
||||
pub struct BlockLoweringOrder {
|
||||
/// Lowered blocks, in BlockIndex order. Each block is some combination of
|
||||
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
|
||||
/// see [LoweredBlock] for details.
|
||||
lowered_order: Vec<LoweredBlock>,
|
||||
/// Successors for all lowered blocks, in one serialized vector. Indexed by
|
||||
/// the ranges in `lowered_succ_ranges`.
|
||||
lowered_succs: Vec<(Inst, LoweredBlock)>,
|
||||
/// BlockIndex values for successors for all lowered blocks, in the same
|
||||
/// order as `lowered_succs`.
|
||||
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
|
||||
/// Ranges in `lowered_succs` giving the successor lists for each lowered
|
||||
/// block. Indexed by lowering-order index (`BlockIndex`).
|
||||
lowered_succ_ranges: Vec<(usize, usize)>,
|
||||
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
|
||||
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
|
||||
/// blocks.
|
||||
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
|
||||
}
|
||||
|
||||
impl BlockRPO {
|
||||
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
|
||||
BlockRPO {
|
||||
visited: vec![false; vcode.num_blocks()],
|
||||
postorder: vec![],
|
||||
deferred_last: None,
|
||||
/// The origin of a block in the lowered block-order: either an original CLIF
|
||||
/// block, or an inserted edge-block, or a combination of the two if an edge is
|
||||
/// non-critical.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum LoweredBlock {
|
||||
/// Block in original CLIF, with no merged edge-blocks.
|
||||
Orig {
|
||||
/// Original CLIF block.
|
||||
block: Block,
|
||||
},
|
||||
/// Block in the original CLIF, plus edge-block to one succ (which is the
|
||||
/// one successor of the original block).
|
||||
OrigAndEdge {
|
||||
/// The original CLIF block contained in this lowered block.
|
||||
block: Block,
|
||||
/// The edge (jump) instruction transitioning from this block
|
||||
/// to the next, i.e., corresponding to the included edge-block. This
|
||||
/// will be an instruction in `block`.
|
||||
edge_inst: Inst,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
/// Block in the original CLIF, preceded by edge-block from one pred (which
|
||||
/// is the one pred of the original block).
|
||||
EdgeAndOrig {
|
||||
/// The previous CLIF block, i.e., the edge block's predecessor.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to the included
|
||||
/// edge-block. This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The original CLIF block included in this lowered block.
|
||||
block: Block,
|
||||
},
|
||||
/// Split critical edge between two CLIF blocks. This lowered block does not
|
||||
/// correspond to any original CLIF blocks; it only serves as an insertion
|
||||
/// point for work to happen on the transition from `pred` to `succ`.
|
||||
Edge {
|
||||
/// The predecessor CLIF block.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to this edge's transition.
|
||||
/// This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
}
|
||||
|
||||
impl LoweredBlock {
|
||||
/// The associated original (CLIF) block included in this lowered block, if
|
||||
/// any.
|
||||
pub fn orig_block(self) -> Option<Block> {
|
||||
match self {
|
||||
LoweredBlock::Orig { block, .. }
|
||||
| LoweredBlock::OrigAndEdge { block, .. }
|
||||
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
|
||||
LoweredBlock::Edge { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
|
||||
self.visited[block as usize] = true;
|
||||
for succ in vcode.succs(block) {
|
||||
if !self.visited[*succ as usize] {
|
||||
self.visit(vcode, *succ);
|
||||
/// The associated in-edge, if any.
|
||||
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred,
|
||||
edge_inst,
|
||||
block,
|
||||
} => Some((pred, edge_inst, block)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// the associated out-edge, if any. Also includes edge-only blocks.
|
||||
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block,
|
||||
edge_inst,
|
||||
succ,
|
||||
} => Some((block, edge_inst, succ)),
|
||||
LoweredBlock::Edge {
|
||||
pred,
|
||||
edge_inst,
|
||||
succ,
|
||||
} => Some((pred, edge_inst, succ)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockLoweringOrder {
|
||||
/// Compute and return a lowered block order for `f`.
|
||||
pub fn new(f: &Function) -> BlockLoweringOrder {
|
||||
debug!("BlockLoweringOrder: function body {:?}", f);
|
||||
|
||||
// Step 1: compute the in-edge and out-edge count of every block.
|
||||
let mut block_in_count = SecondaryMap::with_default(0);
|
||||
let mut block_out_count = SecondaryMap::with_default(0);
|
||||
|
||||
// Cache the block successors to avoid re-examining branches below.
|
||||
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
|
||||
let mut block_succ_range = SecondaryMap::with_default((0, 0));
|
||||
let mut fallthrough_return_block = None;
|
||||
for block in f.layout.blocks() {
|
||||
let block_succ_start = block_succs.len();
|
||||
visit_block_succs(f, block, |inst, succ| {
|
||||
block_out_count[block] += 1;
|
||||
block_in_count[succ] += 1;
|
||||
block_succs.push((inst, succ));
|
||||
});
|
||||
let block_succ_end = block_succs.len();
|
||||
block_succ_range[block] = (block_succ_start, block_succ_end);
|
||||
|
||||
for inst in f.layout.block_likely_branches(block) {
|
||||
if f.dfg[inst].opcode() == Opcode::Return {
|
||||
// Implicit output edge for any return.
|
||||
block_out_count[block] += 1;
|
||||
}
|
||||
if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
|
||||
// Fallthrough return block must come last.
|
||||
debug_assert!(fallthrough_return_block == None);
|
||||
fallthrough_return_block = Some(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Implicit input edge for entry block.
|
||||
if let Some(entry) = f.layout.entry_block() {
|
||||
block_in_count[entry] += 1;
|
||||
}
|
||||
|
||||
// Here we define the implicit CLIF-plus-edges graph. There are
|
||||
// conceptually two such graphs: the original, with every edge explicit,
|
||||
// and the merged one, with blocks (represented by `LoweredBlock`
|
||||
// values) that contain original CLIF blocks, edges, or both. This
|
||||
// function returns a lowered block's successors as per the latter, with
|
||||
// consideration to edge-block merging.
|
||||
//
|
||||
// Note that there is a property of the block-merging rules below
|
||||
// that is very important to ensure we don't miss any lowered blocks:
|
||||
// any block in the implicit CLIF-plus-edges graph will *only* be
|
||||
// included in one block in the merged graph.
|
||||
//
|
||||
// This, combined with the property that every edge block is reachable
|
||||
// only from one predecessor (and hence cannot be reached by a DFS
|
||||
// backedge), means that it is sufficient in our DFS below to track
|
||||
// visited-bits per original CLIF block only, not per edge. This greatly
|
||||
// simplifies the data structures (no need to keep a sparse hash-set of
|
||||
// (block, block) tuples).
|
||||
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
|
||||
let start_idx = ret.len();
|
||||
match block {
|
||||
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
|
||||
// At an orig block; successors are always edge blocks,
|
||||
// possibly with orig blocks following.
|
||||
let range = block_succ_range[block];
|
||||
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
|
||||
if block_in_count[succ] == 1 {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
block: succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::Edge {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
LoweredBlock::Edge {
|
||||
succ, edge_inst, ..
|
||||
}
|
||||
| LoweredBlock::OrigAndEdge {
|
||||
succ, edge_inst, ..
|
||||
} => {
|
||||
// At an edge block; successors are always orig blocks,
|
||||
// possibly with edge blocks following.
|
||||
if block_out_count[succ] == 1 {
|
||||
let range = block_succ_range[succ];
|
||||
// check if the one succ is a real CFG edge (vs.
|
||||
// implicit return succ).
|
||||
if range.1 - range.0 > 0 {
|
||||
debug_assert!(range.1 - range.0 == 1);
|
||||
let (succ_edge_inst, succ_succ) = block_succs[range.0];
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block: succ,
|
||||
edge_inst: succ_edge_inst,
|
||||
succ: succ_succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
}
|
||||
}
|
||||
let end_idx = ret.len();
|
||||
(start_idx, end_idx)
|
||||
};
|
||||
|
||||
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
|
||||
let mut lowered_succs = vec![];
|
||||
let mut lowered_succ_indices = vec![];
|
||||
|
||||
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
|
||||
// explicit stack so we don't overflow the real stack with a deep DFS.
|
||||
#[derive(Debug)]
|
||||
struct StackEntry {
|
||||
this: LoweredBlock,
|
||||
succs: (usize, usize), // range in lowered_succs
|
||||
cur_succ: usize, // index in lowered_succs
|
||||
}
|
||||
|
||||
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
|
||||
let mut visited = FxHashSet::default();
|
||||
let mut postorder = vec![];
|
||||
if let Some(entry) = f.layout.entry_block() {
|
||||
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
|
||||
// to not special-case the entry block here.
|
||||
let block = LoweredBlock::Orig { block: entry };
|
||||
visited.insert(block);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, block);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: block,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
}
|
||||
|
||||
let mut deferred_last = None;
|
||||
while !stack.is_empty() {
|
||||
let stack_entry = stack.last_mut().unwrap();
|
||||
let range = stack_entry.succs;
|
||||
if stack_entry.cur_succ == range.0 {
|
||||
let orig_block = stack_entry.this.orig_block();
|
||||
if orig_block.is_some() && orig_block == fallthrough_return_block {
|
||||
deferred_last = Some((stack_entry.this, range));
|
||||
} else {
|
||||
postorder.push((stack_entry.this, range));
|
||||
}
|
||||
stack.pop();
|
||||
} else {
|
||||
// Heuristic: chase the children in reverse. This puts the first
|
||||
// successor block first in RPO, all other things being equal,
|
||||
// which tends to prioritize loop backedges over out-edges,
|
||||
// putting the edge-block closer to the loop body and minimizing
|
||||
// live-ranges in linear instruction space.
|
||||
let next = lowered_succs[stack_entry.cur_succ - 1].1;
|
||||
stack_entry.cur_succ -= 1;
|
||||
if visited.contains(&next) {
|
||||
continue;
|
||||
}
|
||||
visited.insert(next);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, next);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: next,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for i in vcode.block_insns(BlockIx::new(block)) {
|
||||
if vcode.get_insn(i).is_epilogue_placeholder() {
|
||||
debug_assert!(self.deferred_last.is_none());
|
||||
self.deferred_last = Some(block);
|
||||
return;
|
||||
postorder.reverse();
|
||||
let mut rpo = postorder;
|
||||
if let Some(d) = deferred_last {
|
||||
rpo.push(d);
|
||||
}
|
||||
|
||||
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
|
||||
let mut lowered_order = vec![];
|
||||
let mut lowered_succ_ranges = vec![];
|
||||
let mut lb_to_bindex = FxHashMap::default();
|
||||
for (block, succ_range) in rpo.into_iter() {
|
||||
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
|
||||
lowered_order.push(block);
|
||||
lowered_succ_ranges.push(succ_range);
|
||||
}
|
||||
|
||||
let lowered_succ_indices = lowered_succs
|
||||
.iter()
|
||||
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
|
||||
.collect();
|
||||
|
||||
let mut orig_map = SecondaryMap::with_default(None);
|
||||
for (i, lb) in lowered_order.iter().enumerate() {
|
||||
let i = i as BlockIndex;
|
||||
if let Some(b) = lb.orig_block() {
|
||||
orig_map[b] = Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
self.postorder.push(block);
|
||||
let result = BlockLoweringOrder {
|
||||
lowered_order,
|
||||
lowered_succs,
|
||||
lowered_succ_indices,
|
||||
lowered_succ_ranges,
|
||||
orig_map,
|
||||
};
|
||||
debug!("BlockLoweringOrder: {:?}", result);
|
||||
result
|
||||
}
|
||||
|
||||
fn rpo(self) -> Vec<BlockIndex> {
|
||||
let mut rpo = self.postorder;
|
||||
rpo.reverse();
|
||||
if let Some(block) = self.deferred_last {
|
||||
rpo.push(block);
|
||||
/// Get the lowered order of blocks.
|
||||
pub fn lowered_order(&self) -> &[LoweredBlock] {
|
||||
&self.lowered_order[..]
|
||||
}
|
||||
|
||||
/// Get the successors for a lowered block, by index in `lowered_order()`'s
|
||||
/// returned slice. Each successsor is paired with the edge-instruction
|
||||
/// (branch) corresponding to this edge.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
|
||||
let range = self.lowered_succ_ranges[block as usize];
|
||||
&self.lowered_succs[range.0..range.1]
|
||||
}
|
||||
|
||||
/// Get the successor indices for a lowered block.
|
||||
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
|
||||
let range = self.lowered_succ_ranges[block as usize];
|
||||
&self.lowered_succ_indices[range.0..range.1]
|
||||
}
|
||||
|
||||
/// Get the lowered block index containing a CLIF block, if any. (May not be
|
||||
/// present if the original CLIF block was unreachable.)
|
||||
pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
|
||||
self.orig_map[bb]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||
use crate::isa::CallConv;
|
||||
|
||||
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
|
||||
assert!(n_blocks > 0);
|
||||
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
let blocks = (0..n_blocks)
|
||||
.map(|i| {
|
||||
let bb = func.dfg.make_block();
|
||||
assert!(bb.as_u32() == i as u32);
|
||||
bb
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let arg0 = func.dfg.append_block_param(blocks[0], I32);
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
|
||||
let mut edge = 0;
|
||||
for i in 0..n_blocks {
|
||||
pos.insert_block(blocks[i]);
|
||||
let mut succs = vec![];
|
||||
while edge < edges.len() && edges[edge].0 == i {
|
||||
succs.push(edges[edge].1);
|
||||
edge += 1;
|
||||
}
|
||||
if succs.len() == 0 {
|
||||
pos.ins().return_(&[arg0]);
|
||||
} else if succs.len() == 1 {
|
||||
pos.ins().jump(blocks[succs[0]], &[]);
|
||||
} else if succs.len() == 2 {
|
||||
pos.ins().brnz(arg0, blocks[succs[0]], &[]);
|
||||
pos.ins().jump(blocks[succs[1]], &[]);
|
||||
} else {
|
||||
panic!("Too many successors");
|
||||
}
|
||||
}
|
||||
rpo
|
||||
|
||||
func
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockorder_diamond() {
|
||||
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 6);
|
||||
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
|
||||
assert!(order.lowered_order[2].orig_block().is_none());
|
||||
assert!(order.lowered_order[2].in_edge().is_none());
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[3].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[5].in_edge().is_none());
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockorder_critedge() {
|
||||
// 0
|
||||
// / \
|
||||
// 1 2
|
||||
// / \ \
|
||||
// 3 4 |
|
||||
// |\ _|____|
|
||||
// | \/ |
|
||||
// | /\ |
|
||||
// 5 6
|
||||
//
|
||||
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
|
||||
//
|
||||
let func = build_test_func(
|
||||
7,
|
||||
&[
|
||||
(0, 1),
|
||||
(0, 2),
|
||||
(1, 3),
|
||||
(1, 4),
|
||||
(2, 5),
|
||||
(3, 5),
|
||||
(3, 6),
|
||||
(4, 6),
|
||||
],
|
||||
);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 11);
|
||||
println!("ordered = {:?}", order.lowered_order);
|
||||
|
||||
// block 0
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
// edge 0->1 + block 1
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
assert!(order.lowered_order[1].out_edge().is_none());
|
||||
|
||||
// edge 1->3 + block 3
|
||||
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
|
||||
assert!(order.lowered_order[2].out_edge().is_none());
|
||||
|
||||
// edge 3->5
|
||||
assert!(order.lowered_order[3].orig_block().is_none());
|
||||
assert!(order.lowered_order[3].in_edge().is_none());
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
|
||||
|
||||
// edge 3->6
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
|
||||
|
||||
// edge 1->4 + block 4
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
|
||||
// edge 4->6
|
||||
assert!(order.lowered_order[6].orig_block().is_none());
|
||||
assert!(order.lowered_order[6].in_edge().is_none());
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
|
||||
|
||||
// block 6
|
||||
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
|
||||
assert!(order.lowered_order[7].in_edge().is_none());
|
||||
assert!(order.lowered_order[7].out_edge().is_none());
|
||||
|
||||
// edge 0->2 + block 2
|
||||
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[8].out_edge().is_none());
|
||||
|
||||
// edge 2->5
|
||||
assert!(order.lowered_order[9].orig_block().is_none());
|
||||
assert!(order.lowered_order[9].in_edge().is_none());
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
|
||||
|
||||
// block 5
|
||||
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
|
||||
assert!(order.lowered_order[10].in_edge().is_none());
|
||||
assert!(order.lowered_order[10].out_edge().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the final block order.
|
||||
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
|
||||
let mut rpo = BlockRPO::new(vcode);
|
||||
rpo.visit(vcode, vcode.entry());
|
||||
rpo.rpo()
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -6,11 +6,11 @@ use crate::settings;
|
|||
use crate::timing;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{allocate_registers, RegAllocAlgorithm};
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend>(
|
||||
pub fn compile<B: LowerBackend + MachBackend>(
|
||||
f: &Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABIBody<I = B::MInst>>,
|
||||
|
@ -18,29 +18,46 @@ pub fn compile<B: LowerBackend>(
|
|||
where
|
||||
B::MInst: ShowWithRRU,
|
||||
{
|
||||
// This lowers the CL IR.
|
||||
let mut vcode = Lower::new(f, abi)?.lower(b)?;
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
// Build the lowering context.
|
||||
let lower = Lower::new(f, abi, block_order)?;
|
||||
// Lower the IR.
|
||||
let mut vcode = lower.lower(b)?;
|
||||
|
||||
let universe = &B::MInst::reg_universe(vcode.flags());
|
||||
|
||||
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
|
||||
debug!(
|
||||
"vcode from lowering: \n{}",
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
);
|
||||
|
||||
// Perform register allocation.
|
||||
let algorithm = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => RegAllocAlgorithm::Backtracking,
|
||||
settings::Regalloc::BacktrackingChecked => RegAllocAlgorithm::BacktrackingChecked,
|
||||
settings::Regalloc::ExperimentalLinearScan => RegAllocAlgorithm::LinearScan,
|
||||
let (run_checker, algorithm) = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
|
||||
settings::Regalloc::BacktrackingChecked => {
|
||||
(true, Algorithm::Backtracking(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScan => {
|
||||
(false, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScanChecked => {
|
||||
(true, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
};
|
||||
|
||||
let result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers(
|
||||
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
|
||||
allocate_registers_with_opts(
|
||||
&mut vcode,
|
||||
b.reg_universe(),
|
||||
Options {
|
||||
run_checker,
|
||||
algorithm,
|
||||
},
|
||||
)
|
||||
.map_err(|err| {
|
||||
debug!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(universe)),
|
||||
vcode.show_rru(Some(b.reg_universe())),
|
||||
err
|
||||
);
|
||||
err
|
||||
|
@ -52,14 +69,9 @@ where
|
|||
// all at once. This also inserts prologues/epilogues.
|
||||
vcode.replace_insns_from_regalloc(result);
|
||||
|
||||
vcode.remove_redundant_branches();
|
||||
|
||||
// Do final passes over code to finalize branches.
|
||||
vcode.finalize_branches();
|
||||
|
||||
debug!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
vcode.show_rru(Some(universe))
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
);
|
||||
|
||||
Ok(vcode)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -109,6 +109,7 @@ use regalloc::RegUsageCollector;
|
|||
use regalloc::{
|
||||
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use std::string::String;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
|
@ -124,8 +125,8 @@ pub mod abi;
|
|||
pub use abi::*;
|
||||
pub mod pretty_print;
|
||||
pub use pretty_print::*;
|
||||
pub mod sections;
|
||||
pub use sections::*;
|
||||
pub mod buffer;
|
||||
pub use buffer::*;
|
||||
pub mod adapter;
|
||||
pub use adapter::*;
|
||||
|
||||
|
@ -137,7 +138,7 @@ pub trait MachInst: Clone + Debug {
|
|||
|
||||
/// Map virtual registers to physical registers using the given virt->phys
|
||||
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||
fn map_regs(&mut self, maps: &RegUsageMapper);
|
||||
fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
|
||||
|
||||
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||
|
@ -152,6 +153,9 @@ pub trait MachInst: Clone + Debug {
|
|||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
/// Generate a constant into a reg.
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]>;
|
||||
|
||||
/// Generate a zero-length no-op.
|
||||
fn gen_zero_len_nop() -> Self;
|
||||
|
||||
|
@ -166,7 +170,7 @@ pub trait MachInst: Clone + Debug {
|
|||
|
||||
/// Generate a jump to another target. Used during lowering of
|
||||
/// control flow.
|
||||
fn gen_jump(target: BlockIndex) -> Self;
|
||||
fn gen_jump(target: MachLabel) -> Self;
|
||||
|
||||
/// Generate a NOP. The `preferred_size` parameter allows the caller to
|
||||
/// request a NOP of that size, or as close to it as possible. The machine
|
||||
|
@ -175,17 +179,6 @@ pub trait MachInst: Clone + Debug {
|
|||
/// the instruction must have a nonzero size.
|
||||
fn gen_nop(preferred_size: usize) -> Self;
|
||||
|
||||
/// Rewrite block targets using the block-target map.
|
||||
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
|
||||
|
||||
/// Finalize branches once the block order (fallthrough) is known.
|
||||
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
|
||||
|
||||
/// Update instruction once block offsets are known. These offsets are
|
||||
/// relative to the beginning of the function. `targets` is indexed by
|
||||
/// BlockIndex.
|
||||
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
|
||||
|
||||
/// Get the register universe for this backend.
|
||||
fn reg_universe(flags: &Flags) -> RealRegUniverse;
|
||||
|
||||
|
@ -194,6 +187,54 @@ pub trait MachInst: Clone + Debug {
|
|||
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
|
||||
offset
|
||||
}
|
||||
|
||||
/// What is the worst-case instruction size emitted by this instruction type?
|
||||
fn worst_case_size() -> CodeOffset;
|
||||
|
||||
/// A label-use kind: a type that describes the types of label references that
|
||||
/// can occur in an instruction.
|
||||
type LabelUse: MachInstLabelUse;
|
||||
}
|
||||
|
||||
/// A descriptor of a label reference (use) in an instruction set.
|
||||
pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
|
||||
/// Required alignment for any veneer. Usually the required instruction
|
||||
/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
|
||||
const ALIGN: CodeOffset;
|
||||
|
||||
/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
|
||||
/// label-reference fixup at offset `x` is valid if the label resolves to `x
|
||||
/// + 1024`.
|
||||
fn max_pos_range(self) -> CodeOffset;
|
||||
/// What is the maximum PC-relative range (negative)? This is the absolute
|
||||
/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
|
||||
/// valid if the label resolves to `x - 1024`.
|
||||
fn max_neg_range(self) -> CodeOffset;
|
||||
/// What is the size of code-buffer slice this label-use needs to patch in
|
||||
/// the label's value?
|
||||
fn patch_size(self) -> CodeOffset;
|
||||
/// Perform a code-patch, given the offset into the buffer of this label use
|
||||
/// and the offset into the buffer of the label's definition.
|
||||
/// It is guaranteed that, given `delta = offset - label_offset`, we will
|
||||
/// have `offset >= -self.max_neg_range()` and `offset <=
|
||||
/// self.max_pos_range()`.
|
||||
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
|
||||
/// Can the label-use be patched to a veneer that supports a longer range?
|
||||
/// Usually valid for jumps (a short-range jump can jump to a longer-range
|
||||
/// jump), but not for e.g. constant pool references, because the constant
|
||||
/// load would require different code (one more level of indirection).
|
||||
fn supports_veneer(self) -> bool;
|
||||
/// How many bytes are needed for a veneer?
|
||||
fn veneer_size(self) -> CodeOffset;
|
||||
/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
|
||||
/// bytes long at offset `veneer_offset` in the buffer. The original
|
||||
/// label-use will be patched to refer to this veneer's offset. A new
|
||||
/// (offset, LabelUse) is returned that allows the veneer to use the actual
|
||||
/// label. For veneers to work properly, it is expected that the new veneer
|
||||
/// has a larger range; on most platforms this probably means either a
|
||||
/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
|
||||
/// stage, a jump that supports a full 32-bit range, for example.
|
||||
fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
|
||||
}
|
||||
|
||||
/// Describes a block terminator (not call) in the vcode, when its branches
|
||||
|
@ -205,24 +246,26 @@ pub enum MachTerminator<'a> {
|
|||
/// A return instruction.
|
||||
Ret,
|
||||
/// An unconditional branch to another block.
|
||||
Uncond(BlockIndex),
|
||||
Uncond(MachLabel),
|
||||
/// A conditional branch to one of two other blocks.
|
||||
Cond(BlockIndex, BlockIndex),
|
||||
Cond(MachLabel, MachLabel),
|
||||
/// An indirect branch with known possible targets.
|
||||
Indirect(&'a [BlockIndex]),
|
||||
Indirect(&'a [MachLabel]),
|
||||
}
|
||||
|
||||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit<O: MachSectionOutput> {
|
||||
pub trait MachInstEmit: MachInst {
|
||||
/// Persistent state carried across `emit` invocations.
|
||||
type State: Default + Clone + Debug;
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut O, flags: &Flags);
|
||||
fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
/// code (as bytes) and a disassembly, if requested.
|
||||
pub struct MachCompileResult {
|
||||
/// Machine code.
|
||||
pub sections: MachSections,
|
||||
pub buffer: MachBufferFinalized,
|
||||
/// Size of stack frame, in bytes.
|
||||
pub frame_size: u32,
|
||||
/// Disassembly, if requested.
|
||||
|
@ -232,7 +275,7 @@ pub struct MachCompileResult {
|
|||
impl MachCompileResult {
|
||||
/// Get a `CodeInfo` describing section sizes from this compilation result.
|
||||
pub fn code_info(&self) -> CodeInfo {
|
||||
let code_size = self.sections.total_size();
|
||||
let code_size = self.buffer.total_size();
|
||||
CodeInfo {
|
||||
code_size,
|
||||
jumptables_size: 0,
|
||||
|
@ -262,17 +305,13 @@ pub trait MachBackend {
|
|||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Return the register universe for this backend.
|
||||
fn reg_universe(&self) -> RealRegUniverse;
|
||||
fn reg_universe(&self) -> &RealRegUniverse;
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
/// Condition that will be true when an IaddIfcout overflows.
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC;
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
/// Condition that will be true when an IsubIfcout overflows.
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC;
|
||||
}
|
||||
|
|
|
@ -1,460 +0,0 @@
|
|||
//! In-memory representation of compiled machine code, in multiple sections
|
||||
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
|
||||
//! simultaneously, so we buffer the result in memory and hand off to the
|
||||
//! caller at the end of compilation.
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// A collection of sections with defined start-offsets.
|
||||
pub struct MachSections {
|
||||
/// Sections, in offset order.
|
||||
pub sections: Vec<MachSection>,
|
||||
}
|
||||
|
||||
impl MachSections {
|
||||
/// New, empty set of sections.
|
||||
pub fn new() -> MachSections {
|
||||
MachSections { sections: vec![] }
|
||||
}
|
||||
|
||||
/// Add a section with a known offset and size. Returns the index.
|
||||
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
|
||||
let idx = self.sections.len();
|
||||
self.sections.push(MachSection::new(start, length));
|
||||
idx
|
||||
}
|
||||
|
||||
/// Mutably borrow the given section by index.
|
||||
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
|
||||
&mut self.sections[idx]
|
||||
}
|
||||
|
||||
/// Get mutable borrows of two sections simultaneously. Used during
|
||||
/// instruction emission to provide references to the .text and .rodata
|
||||
/// (constant pool) sections.
|
||||
pub fn two_sections<'a>(
|
||||
&'a mut self,
|
||||
idx1: usize,
|
||||
idx2: usize,
|
||||
) -> (&'a mut MachSection, &'a mut MachSection) {
|
||||
assert!(idx1 < idx2);
|
||||
assert!(idx1 < self.sections.len());
|
||||
assert!(idx2 < self.sections.len());
|
||||
let (first, rest) = self.sections.split_at_mut(idx2);
|
||||
(&mut first[idx1], &mut rest[0])
|
||||
}
|
||||
|
||||
/// Emit this set of sections to a set of sinks for the code,
|
||||
/// relocations, traps, and stackmap.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
// N.B.: we emit every section into the .text section as far as
|
||||
// the `CodeSink` is concerned; we do not bother to segregate
|
||||
// the contents into the actual program text, the jumptable and the
|
||||
// rodata (constant pool). This allows us to generate code assuming
|
||||
// that these will not be relocated relative to each other, and avoids
|
||||
// having to designate each section as belonging in one of the three
|
||||
// fixed categories defined by `CodeSink`. If this becomes a problem
|
||||
// later (e.g. because of memory permissions or similar), we can
|
||||
// add this designation and segregate the output; take care, however,
|
||||
// to add the appropriate relocations in this case.
|
||||
|
||||
for section in &self.sections {
|
||||
if section.data.len() > 0 {
|
||||
while sink.offset() < section.start_offset {
|
||||
sink.put1(0);
|
||||
}
|
||||
section.emit(sink);
|
||||
}
|
||||
}
|
||||
sink.begin_jumptables();
|
||||
sink.begin_rodata();
|
||||
sink.end_codegen();
|
||||
}
|
||||
|
||||
/// Get a list of source location mapping tuples in sorted-by-start-offset order.
|
||||
pub fn get_srclocs_sorted<'a>(&'a self) -> MachSectionsSrcLocs<'a> {
|
||||
MachSectionsSrcLocs::new(&self.sections)
|
||||
}
|
||||
|
||||
/// Get the total required size for these sections.
|
||||
pub fn total_size(&self) -> CodeOffset {
|
||||
if self.sections.len() == 0 {
|
||||
0
|
||||
} else {
|
||||
// Find the last non-empty section.
|
||||
self.sections
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| s.data.len() > 0)
|
||||
.map(|s| s.cur_offset_from_start())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the srclocs in each section.
|
||||
/// Returns MachSrcLocs in an order sorted by start location.
|
||||
pub struct MachSectionsSrcLocs<'a> {
|
||||
sections: &'a [MachSection],
|
||||
cur_section: usize,
|
||||
cur_srcloc: usize,
|
||||
// For validation:
|
||||
last_offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl<'a> MachSectionsSrcLocs<'a> {
|
||||
fn new(sections: &'a [MachSection]) -> MachSectionsSrcLocs<'a> {
|
||||
MachSectionsSrcLocs {
|
||||
sections,
|
||||
cur_section: 0,
|
||||
cur_srcloc: 0,
|
||||
last_offset: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for MachSectionsSrcLocs<'a> {
|
||||
type Item = &'a MachSrcLoc;
|
||||
|
||||
fn next(&mut self) -> Option<&'a MachSrcLoc> {
|
||||
// We simply iterate through sections and srcloc records in order. This produces a
|
||||
// sorted order naturally because sections are in starting-offset-order, and srclocs
|
||||
// are produced as a section is emitted into, so are in order as well.
|
||||
|
||||
// If we're out of sections, we're done.
|
||||
if self.cur_section >= self.sections.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Otherwise, make sure we have a srcloc in the current section left to return, and
|
||||
// advance to the next section if not. Done if we run out of sections.
|
||||
while self.cur_srcloc >= self.sections[self.cur_section].srclocs.len() {
|
||||
self.cur_srcloc = 0;
|
||||
self.cur_section += 1;
|
||||
if self.cur_section >= self.sections.len() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let loc = &self.sections[self.cur_section].srclocs[self.cur_srcloc];
|
||||
self.cur_srcloc += 1;
|
||||
debug_assert!(loc.start >= self.last_offset);
|
||||
self.last_offset = loc.start;
|
||||
Some(loc)
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstraction over MachSection and MachSectionSize: some
|
||||
/// receiver of section data.
|
||||
pub trait MachSectionOutput {
|
||||
/// Get the current offset from the start of all sections.
|
||||
fn cur_offset_from_start(&self) -> CodeOffset;
|
||||
|
||||
/// Get the start offset of this section.
|
||||
fn start_offset(&self) -> CodeOffset;
|
||||
|
||||
/// Add 1 byte to the section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the section.
|
||||
fn put2(&mut self, value: u16) {
|
||||
let [b0, b1] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
}
|
||||
|
||||
/// Add 4 bytes to the section.
|
||||
fn put4(&mut self, value: u32) {
|
||||
let [b0, b1, b2, b3] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
}
|
||||
|
||||
/// Add 8 bytes to the section.
|
||||
fn put8(&mut self, value: u64) {
|
||||
let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
self.put1(b4);
|
||||
self.put1(b5);
|
||||
self.put1(b6);
|
||||
self.put1(b7);
|
||||
}
|
||||
|
||||
/// Add a slice of bytes to the section.
|
||||
fn put_data(&mut self, data: &[u8]);
|
||||
|
||||
/// Add a relocation at the current offset.
|
||||
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
|
||||
|
||||
/// Add a trap record at the current offset.
|
||||
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
|
||||
|
||||
/// Add a call return address record at the current offset.
|
||||
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
|
||||
|
||||
/// Start the output for the given source-location at the current offset.
|
||||
fn start_srcloc(&mut self, loc: SourceLoc);
|
||||
|
||||
/// End the output for the previously-given source-location at the current offset.
|
||||
fn end_srcloc(&mut self);
|
||||
|
||||
/// Align up to the given alignment.
|
||||
fn align_to(&mut self, align_to: CodeOffset) {
|
||||
assert!(align_to.is_power_of_two());
|
||||
while self.cur_offset_from_start() & (align_to - 1) != 0 {
|
||||
self.put1(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
|
||||
/// Multiple sections may be created with known start offsets in advance; the
|
||||
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
|
||||
/// once, after computing the length of the code, so that constant references
|
||||
/// can use known offsets as instructions are emitted.
|
||||
pub struct MachSection {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The limit of this section, defined by the start of the next section.
|
||||
pub length_limit: CodeOffset,
|
||||
/// The section contents, as raw bytes.
|
||||
pub data: Vec<u8>,
|
||||
/// Any relocations referring to this section.
|
||||
pub relocs: Vec<MachReloc>,
|
||||
/// Any trap records referring to this section.
|
||||
pub traps: Vec<MachTrap>,
|
||||
/// Any call site records referring to this section.
|
||||
pub call_sites: Vec<MachCallSite>,
|
||||
/// Any source location mappings referring to this section.
|
||||
pub srclocs: Vec<MachSrcLoc>,
|
||||
/// The current source location in progress (after `start_srcloc()` and before `end_srcloc()`).
|
||||
/// This is a (start_offset, src_loc) tuple.
|
||||
pub cur_srcloc: Option<(CodeOffset, SourceLoc)>,
|
||||
}
|
||||
|
||||
impl MachSection {
|
||||
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
|
||||
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
|
||||
MachSection {
|
||||
start_offset,
|
||||
length_limit,
|
||||
data: vec![],
|
||||
relocs: vec![],
|
||||
traps: vec![],
|
||||
call_sites: vec![],
|
||||
srclocs: vec![],
|
||||
cur_srcloc: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit this section to the CodeSink and other associated sinks. The
|
||||
/// current offset of the CodeSink must match the starting offset of this
|
||||
/// section.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
assert!(sink.offset() == self.start_offset);
|
||||
|
||||
let mut next_reloc = 0;
|
||||
let mut next_trap = 0;
|
||||
let mut next_call_site = 0;
|
||||
for (idx, byte) in self.data.iter().enumerate() {
|
||||
if next_reloc < self.relocs.len() {
|
||||
let reloc = &self.relocs[next_reloc];
|
||||
if reloc.offset == idx as CodeOffset {
|
||||
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
|
||||
next_reloc += 1;
|
||||
}
|
||||
}
|
||||
if next_trap < self.traps.len() {
|
||||
let trap = &self.traps[next_trap];
|
||||
if trap.offset == idx as CodeOffset {
|
||||
sink.trap(trap.code, trap.srcloc);
|
||||
next_trap += 1;
|
||||
}
|
||||
}
|
||||
if next_call_site < self.call_sites.len() {
|
||||
let call_site = &self.call_sites[next_call_site];
|
||||
if call_site.ret_addr == idx as CodeOffset {
|
||||
sink.add_call_site(call_site.opcode, call_site.srcloc);
|
||||
next_call_site += 1;
|
||||
}
|
||||
}
|
||||
sink.put1(*byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSection {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
self.start_offset + self.data.len() as CodeOffset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, value: u8) {
|
||||
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
|
||||
self.data.push(value);
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
|
||||
self.data.extend_from_slice(data);
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
|
||||
let name = name.clone();
|
||||
self.relocs.push(MachReloc {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
kind,
|
||||
name,
|
||||
addend,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
|
||||
self.traps.push(MachTrap {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
code,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
|
||||
self.call_sites.push(MachCallSite {
|
||||
ret_addr: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
opcode,
|
||||
});
|
||||
}
|
||||
|
||||
fn start_srcloc(&mut self, loc: SourceLoc) {
|
||||
self.cur_srcloc = Some((self.cur_offset_from_start(), loc));
|
||||
}
|
||||
|
||||
fn end_srcloc(&mut self) {
|
||||
let (start, loc) = self
|
||||
.cur_srcloc
|
||||
.take()
|
||||
.expect("end_srcloc() called without start_srcloc()");
|
||||
let end = self.cur_offset_from_start();
|
||||
// Skip zero-length extends.
|
||||
debug_assert!(end >= start);
|
||||
if end > start {
|
||||
self.srclocs.push(MachSrcLoc { start, end, loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A MachSectionOutput implementation that records only size.
|
||||
pub struct MachSectionSize {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The current offset of this section.
|
||||
pub offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl MachSectionSize {
|
||||
/// Create a new size-counting dummy section.
|
||||
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
|
||||
MachSectionSize {
|
||||
start_offset,
|
||||
offset: start_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the size this section would take if emitted with a real sink.
|
||||
pub fn size(&self) -> CodeOffset {
|
||||
self.offset - self.start_offset
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSectionSize {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
// All size-counting sections conceptually start at offset 0; this doesn't
|
||||
// matter when counting code size.
|
||||
self.offset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, _: u8) {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
self.offset += data.len() as CodeOffset;
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
|
||||
|
||||
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
|
||||
|
||||
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
|
||||
|
||||
fn start_srcloc(&mut self, _: SourceLoc) {}
|
||||
|
||||
fn end_srcloc(&mut self) {}
|
||||
}
|
||||
|
||||
/// A relocation resulting from a compilation.
|
||||
pub struct MachReloc {
|
||||
/// The offset at which the relocation applies, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The kind of relocation.
|
||||
pub kind: Reloc,
|
||||
/// The external symbol / name to which this relocation refers.
|
||||
pub name: ExternalName,
|
||||
/// The addend to add to the symbol value.
|
||||
pub addend: i64,
|
||||
}
|
||||
|
||||
/// A trap record resulting from a compilation.
|
||||
pub struct MachTrap {
|
||||
/// The offset at which the trap instruction occurs, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The trap code.
|
||||
pub code: TrapCode,
|
||||
}
|
||||
|
||||
/// A call site record resulting from a compilation.
|
||||
pub struct MachCallSite {
|
||||
/// The offset of the call's return address, *relative to the containing section*.
|
||||
pub ret_addr: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The call's opcode.
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// A source-location mapping resulting from a compilation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MachSrcLoc {
|
||||
/// The start of the region of code corresponding to a source location.
|
||||
/// This is relative to the start of the function, not to the start of the
|
||||
/// section.
|
||||
pub start: CodeOffset,
|
||||
/// The end of the region of code corresponding to a source location.
|
||||
/// This is relative to the start of the section, not to the start of the
|
||||
/// section.
|
||||
pub end: CodeOffset,
|
||||
/// The source location.
|
||||
pub loc: SourceLoc,
|
||||
}
|
|
@ -17,9 +17,7 @@
|
|||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||
//! backend pipeline.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir;
|
||||
use crate::ir::SourceLoc;
|
||||
use crate::ir::{self, SourceLoc};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
|
@ -30,9 +28,7 @@ use regalloc::{
|
|||
};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use alloc::{borrow::Cow, vec::Vec};
|
||||
use std::fmt;
|
||||
use std::iter;
|
||||
use std::string::String;
|
||||
|
@ -44,8 +40,8 @@ pub type BlockIndex = u32;
|
|||
|
||||
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
|
||||
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
|
||||
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
|
||||
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
|
||||
pub trait VCodeInst: MachInst + MachInstEmit {}
|
||||
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
|
||||
|
||||
/// A function in "VCode" (virtualized-register code) form, after lowering.
|
||||
/// This is essentially a standard CFG of basic blocks, where each basic block
|
||||
|
@ -79,25 +75,10 @@ pub struct VCode<I: VCodeInst> {
|
|||
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
|
||||
/// list of tuples above gives (start, end) ranges within this list that
|
||||
/// correspond to each basic block's successors.
|
||||
block_succs: Vec<BlockIndex>,
|
||||
block_succs: Vec<BlockIx>,
|
||||
|
||||
/// Block indices by IR block.
|
||||
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
|
||||
|
||||
/// IR block for each VCode Block. The length of this Vec will likely be
|
||||
/// less than the total number of Blocks, because new Blocks (for edge
|
||||
/// splits, for example) are appended during lowering.
|
||||
bb_by_block: Vec<ir::Block>,
|
||||
|
||||
/// Order of block IDs in final generated code.
|
||||
final_block_order: Vec<BlockIndex>,
|
||||
|
||||
/// Final block offsets. Computed during branch finalization and used
|
||||
/// during emission.
|
||||
final_block_offsets: Vec<CodeOffset>,
|
||||
|
||||
/// Size of code, accounting for block layout / alignment.
|
||||
code_size: CodeOffset,
|
||||
/// Block-order information.
|
||||
block_order: BlockLoweringOrder,
|
||||
|
||||
/// ABI object.
|
||||
abi: Box<dyn ABIBody<I = I>>,
|
||||
|
@ -121,12 +102,8 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
/// In-progress VCode.
|
||||
vcode: VCode<I>,
|
||||
|
||||
/// Current basic block instructions, in reverse order (because blocks are
|
||||
/// built bottom-to-top).
|
||||
bb_insns: SmallVec<[(I, SourceLoc); 32]>,
|
||||
|
||||
/// Current IR-inst instructions, in forward order.
|
||||
ir_inst_insns: SmallVec<[(I, SourceLoc); 4]>,
|
||||
/// Index of the last block-start in the vcode.
|
||||
block_start: InsnIndex,
|
||||
|
||||
/// Start of succs for the current block in the concatenated succs list.
|
||||
succ_start: usize,
|
||||
|
@ -137,12 +114,11 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
|
||||
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||
/// Create a new VCodeBuilder.
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi);
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi, block_order);
|
||||
VCodeBuilder {
|
||||
vcode,
|
||||
bb_insns: SmallVec::new(),
|
||||
ir_inst_insns: SmallVec::new(),
|
||||
block_start: 0,
|
||||
succ_start: 0,
|
||||
cur_srcloc: SourceLoc::default(),
|
||||
}
|
||||
|
@ -153,6 +129,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
&mut *self.vcode.abi
|
||||
}
|
||||
|
||||
/// Access to the BlockLoweringOrder object.
|
||||
pub fn block_order(&self) -> &BlockLoweringOrder {
|
||||
&self.vcode.block_order
|
||||
}
|
||||
|
||||
/// Set the type of a VReg.
|
||||
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
|
||||
while self.vcode.vreg_types.len() <= vreg.get_index() {
|
||||
|
@ -161,53 +142,17 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||
}
|
||||
|
||||
/// Return the underlying bb-to-BlockIndex map.
|
||||
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
|
||||
&self.vcode.block_by_bb
|
||||
}
|
||||
|
||||
/// Initialize the bb-to-BlockIndex map. Returns the first free
|
||||
/// BlockIndex.
|
||||
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
|
||||
let mut bindex: BlockIndex = 0;
|
||||
for bb in blocks.iter() {
|
||||
self.vcode.block_by_bb[*bb] = bindex;
|
||||
self.vcode.bb_by_block.push(*bb);
|
||||
bindex += 1;
|
||||
}
|
||||
bindex
|
||||
}
|
||||
|
||||
/// Get the BlockIndex for an IR block.
|
||||
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
|
||||
self.vcode.block_by_bb[bb]
|
||||
}
|
||||
|
||||
/// Set the current block as the entry block.
|
||||
pub fn set_entry(&mut self, block: BlockIndex) {
|
||||
self.vcode.entry = block;
|
||||
}
|
||||
|
||||
/// End the current IR instruction. Must be called after pushing any
|
||||
/// instructions and prior to ending the basic block.
|
||||
pub fn end_ir_inst(&mut self) {
|
||||
while let Some(pair) = self.ir_inst_insns.pop() {
|
||||
self.bb_insns.push(pair);
|
||||
}
|
||||
}
|
||||
|
||||
/// End the current basic block. Must be called after emitting vcode insts
|
||||
/// for IR insts and prior to ending the function (building the VCode).
|
||||
pub fn end_bb(&mut self) -> BlockIndex {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
let block_num = self.vcode.block_ranges.len() as BlockIndex;
|
||||
// Push the instructions.
|
||||
let start_idx = self.vcode.insts.len() as InsnIndex;
|
||||
while let Some((i, loc)) = self.bb_insns.pop() {
|
||||
self.vcode.insts.push(i);
|
||||
self.vcode.srclocs.push(loc);
|
||||
}
|
||||
pub fn end_bb(&mut self) {
|
||||
let start_idx = self.block_start;
|
||||
let end_idx = self.vcode.insts.len() as InsnIndex;
|
||||
self.block_start = end_idx;
|
||||
// Add the instruction index range to the list of blocks.
|
||||
self.vcode.block_ranges.push((start_idx, end_idx));
|
||||
// End the successors list.
|
||||
|
@ -216,8 +161,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
.block_succ_range
|
||||
.push((self.succ_start, succ_end));
|
||||
self.succ_start = succ_end;
|
||||
|
||||
block_num
|
||||
}
|
||||
|
||||
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||
|
@ -225,19 +168,27 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
match insn.is_term() {
|
||||
MachTerminator::None | MachTerminator::Ret => {}
|
||||
MachTerminator::Uncond(target) => {
|
||||
self.vcode.block_succs.push(target);
|
||||
self.vcode.block_succs.push(BlockIx::new(target.get()));
|
||||
}
|
||||
MachTerminator::Cond(true_branch, false_branch) => {
|
||||
self.vcode.block_succs.push(true_branch);
|
||||
self.vcode.block_succs.push(false_branch);
|
||||
self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
|
||||
self.vcode
|
||||
.block_succs
|
||||
.push(BlockIx::new(false_branch.get()));
|
||||
}
|
||||
MachTerminator::Indirect(targets) => {
|
||||
for target in targets {
|
||||
self.vcode.block_succs.push(*target);
|
||||
self.vcode.block_succs.push(BlockIx::new(target.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
self.ir_inst_insns.push((insn, self.cur_srcloc));
|
||||
self.vcode.insts.push(insn);
|
||||
self.vcode.srclocs.push(self.cur_srcloc);
|
||||
}
|
||||
|
||||
/// Get the current source location.
|
||||
pub fn get_srcloc(&self) -> SourceLoc {
|
||||
self.cur_srcloc
|
||||
}
|
||||
|
||||
/// Set the current source location.
|
||||
|
@ -247,8 +198,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
|
||||
/// Build the final VCode.
|
||||
pub fn build(self) -> VCode<I> {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
assert!(self.bb_insns.is_empty());
|
||||
self.vcode
|
||||
}
|
||||
}
|
||||
|
@ -270,35 +219,9 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
|
||||
let range = vcode.block_insns(BlockIx::new(block));
|
||||
|
||||
debug!(
|
||||
"is_trivial_jump_block: block {} has len {}",
|
||||
block,
|
||||
range.len()
|
||||
);
|
||||
|
||||
if range.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let insn = range.first();
|
||||
|
||||
debug!(
|
||||
" -> only insn is: {:?} with terminator {:?}",
|
||||
vcode.get_insn(insn),
|
||||
vcode.get_insn(insn).is_term()
|
||||
);
|
||||
|
||||
match vcode.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(target) => Some(target),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCode<I> {
|
||||
/// New empty VCode.
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
|
||||
VCode {
|
||||
liveins: abi.liveins(),
|
||||
liveouts: abi.liveouts(),
|
||||
|
@ -309,11 +232,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
block_ranges: vec![],
|
||||
block_succ_range: vec![],
|
||||
block_succs: vec![],
|
||||
block_by_bb: SecondaryMap::with_default(0),
|
||||
bb_by_block: vec![],
|
||||
final_block_order: vec![],
|
||||
final_block_offsets: vec![],
|
||||
code_size: 0,
|
||||
block_order,
|
||||
abi,
|
||||
}
|
||||
}
|
||||
|
@ -345,7 +264,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
}
|
||||
|
||||
/// Get the successors for a block.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
|
||||
let (start, end) = self.block_succ_range[block as usize];
|
||||
&self.block_succs[start..end]
|
||||
}
|
||||
|
@ -354,8 +273,6 @@ impl<I: VCodeInst> VCode<I> {
|
|||
/// instructions including spliced fill/reload/move instructions, and replace
|
||||
/// the VCode with them.
|
||||
pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
|
||||
self.final_block_order = compute_final_block_order(self);
|
||||
|
||||
// Record the spillslot count and clobbered registers for the ABI/stack
|
||||
// setup code.
|
||||
self.abi.set_num_spillslots(result.num_spill_slots as usize);
|
||||
|
@ -370,11 +287,12 @@ impl<I: VCodeInst> VCode<I> {
|
|||
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||
let mut final_srclocs = vec![];
|
||||
|
||||
for block in &self.final_block_order {
|
||||
let (start, end) = block_ranges[*block as usize];
|
||||
for block in 0..self.num_blocks() {
|
||||
let block = block as BlockIndex;
|
||||
let (start, end) = block_ranges[block as usize];
|
||||
let final_start = final_insns.len() as InsnIndex;
|
||||
|
||||
if *block == self.entry {
|
||||
if block == self.entry {
|
||||
// Start with the prologue.
|
||||
let prologue = self.abi.gen_prologue();
|
||||
let len = prologue.len();
|
||||
|
@ -416,7 +334,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
}
|
||||
|
||||
let final_end = final_insns.len() as InsnIndex;
|
||||
final_block_ranges[*block as usize] = (final_start, final_end);
|
||||
final_block_ranges[block as usize] = (final_start, final_end);
|
||||
}
|
||||
|
||||
debug_assert!(final_insns.len() == final_srclocs.len());
|
||||
|
@ -426,174 +344,68 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.block_ranges = final_block_ranges;
|
||||
}
|
||||
|
||||
/// Removes redundant branches, rewriting targets to point directly to the
|
||||
/// ultimate block at the end of a chain of trivial one-target jumps.
|
||||
pub fn remove_redundant_branches(&mut self) {
|
||||
// For each block, compute the actual target block, looking through up to one
|
||||
// block with single-target jumps (this will remove empty edge blocks inserted
|
||||
// by phi-lowering).
|
||||
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
|
||||
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
|
||||
.collect();
|
||||
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
|
||||
|
||||
debug!(
|
||||
"remove_redundant_branches: block_rewrites = {:?}",
|
||||
block_rewrites
|
||||
);
|
||||
|
||||
refcounts[self.entry as usize] = 1;
|
||||
|
||||
for block in 0..self.num_blocks() as u32 {
|
||||
for insn in self.block_insns(BlockIx::new(block)) {
|
||||
self.get_insn_mut(insn)
|
||||
.with_block_rewrites(&block_rewrites[..]);
|
||||
match self.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(bix) => {
|
||||
refcounts[bix as usize] += 1;
|
||||
}
|
||||
MachTerminator::Cond(bix1, bix2) => {
|
||||
refcounts[bix1 as usize] += 1;
|
||||
refcounts[bix2 as usize] += 1;
|
||||
}
|
||||
MachTerminator::Indirect(blocks) => {
|
||||
for block in blocks {
|
||||
refcounts[*block as usize] += 1;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
|
||||
|
||||
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
|
||||
self.final_block_order = block_order
|
||||
.into_iter()
|
||||
.filter(|b| !deleted[*b as usize])
|
||||
.collect();
|
||||
|
||||
// Rewrite successor information based on the block-rewrite map.
|
||||
for succ in &mut self.block_succs {
|
||||
let new_succ = block_rewrites[*succ as usize];
|
||||
*succ = new_succ;
|
||||
}
|
||||
}
|
||||
|
||||
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
|
||||
/// depending on fallthrough; and (ii) use concrete offsets.
|
||||
pub fn finalize_branches(&mut self)
|
||||
/// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
|
||||
/// reloc/trap/etc. records ready for use.
|
||||
pub fn emit(&self) -> MachBuffer<I>
|
||||
where
|
||||
I: MachInstEmit<MachSectionSize>,
|
||||
I: MachInstEmit,
|
||||
{
|
||||
// Compute fallthrough block, indexed by block.
|
||||
let num_final_blocks = self.final_block_order.len();
|
||||
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
|
||||
for i in 0..(num_final_blocks - 1) {
|
||||
let from = self.final_block_order[i];
|
||||
let to = self.final_block_order[i + 1];
|
||||
block_fallthrough[from as usize] = Some(to);
|
||||
}
|
||||
let mut buffer = MachBuffer::new();
|
||||
let mut state = Default::default();
|
||||
|
||||
// Pass over VCode instructions and finalize two-way branches into
|
||||
// one-way branches with fallthrough.
|
||||
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
|
||||
|
||||
let flags = self.abi.flags();
|
||||
let mut cur_srcloc = None;
|
||||
for block in 0..self.num_blocks() {
|
||||
let next_block = block_fallthrough[block];
|
||||
let (start, end) = self.block_ranges[block];
|
||||
|
||||
for iix in start..end {
|
||||
let insn = &mut self.insts[iix as usize];
|
||||
insn.with_fallthrough_block(next_block);
|
||||
}
|
||||
}
|
||||
|
||||
let flags = self.abi.flags();
|
||||
|
||||
// Compute block offsets.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
let mut block_offsets = vec![0; self.num_blocks()];
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
block_offsets[block as usize] = code_section.offset;
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(&mut code_section, flags);
|
||||
}
|
||||
}
|
||||
|
||||
// We now have the section layout.
|
||||
self.final_block_offsets = block_offsets;
|
||||
self.code_size = code_section.size();
|
||||
|
||||
// Update branches with known block offsets. This looks like the
|
||||
// traversal above, but (i) does not update block_offsets, rather uses
|
||||
// it (so forward references are now possible), and (ii) mutates the
|
||||
// instructions.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize]
|
||||
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
|
||||
self.insts[iix as usize].emit(&mut code_section, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the instructions to a list of sections.
|
||||
pub fn emit(&self) -> MachSections
|
||||
where
|
||||
I: MachInstEmit<MachSection>,
|
||||
{
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, self.code_size);
|
||||
let code_section = sections.get_section(code_idx);
|
||||
|
||||
let flags = self.abi.flags();
|
||||
let mut cur_srcloc = SourceLoc::default();
|
||||
for &block in &self.final_block_order {
|
||||
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
|
||||
while new_offset > code_section.cur_offset_from_start() {
|
||||
let block = block as BlockIndex;
|
||||
let new_offset = I::align_basic_block(buffer.cur_offset());
|
||||
while new_offset > buffer.cur_offset() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
|
||||
nop.emit(code_section, flags);
|
||||
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
|
||||
nop.emit(&mut buffer, flags, &mut Default::default());
|
||||
}
|
||||
assert_eq!(code_section.cur_offset_from_start(), new_offset);
|
||||
assert_eq!(buffer.cur_offset(), new_offset);
|
||||
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
buffer.bind_label(MachLabel::from_block(block));
|
||||
for iix in start..end {
|
||||
let srcloc = self.srclocs[iix as usize];
|
||||
if srcloc != cur_srcloc {
|
||||
if !cur_srcloc.is_default() {
|
||||
code_section.end_srcloc();
|
||||
if cur_srcloc != Some(srcloc) {
|
||||
if cur_srcloc.is_some() {
|
||||
buffer.end_srcloc();
|
||||
}
|
||||
if !srcloc.is_default() {
|
||||
code_section.start_srcloc(srcloc);
|
||||
}
|
||||
cur_srcloc = srcloc;
|
||||
buffer.start_srcloc(srcloc);
|
||||
cur_srcloc = Some(srcloc);
|
||||
}
|
||||
|
||||
self.insts[iix as usize].emit(code_section, flags);
|
||||
self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
|
||||
}
|
||||
|
||||
if !cur_srcloc.is_default() {
|
||||
code_section.end_srcloc();
|
||||
cur_srcloc = SourceLoc::default();
|
||||
if cur_srcloc.is_some() {
|
||||
buffer.end_srcloc();
|
||||
cur_srcloc = None;
|
||||
}
|
||||
|
||||
// Do we need an island? Get the worst-case size of the next BB and see if, having
|
||||
// emitted that many bytes, we will be beyond the deadline.
|
||||
if block < (self.num_blocks() - 1) as BlockIndex {
|
||||
let next_block = block + 1;
|
||||
let next_block_range = self.block_ranges[next_block as usize];
|
||||
let next_block_size = next_block_range.1 - next_block_range.0;
|
||||
let worst_case_next_bb = I::worst_case_size() * next_block_size;
|
||||
if buffer.island_needed(worst_case_next_bb) {
|
||||
buffer.emit_island();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sections
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Get the IR block for a BlockIndex, if one exists.
|
||||
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
|
||||
if (block as usize) < self.bb_by_block.len() {
|
||||
Some(self.bb_by_block[block as usize])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
self.block_order.lowered_order()[block as usize].orig_block()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -629,13 +441,9 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
|||
Range::new(InstIx::new(start), (end - start) as usize)
|
||||
}
|
||||
|
||||
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
|
||||
fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
|
||||
let (start, end) = self.block_succ_range[block.get() as usize];
|
||||
self.block_succs[start..end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(BlockIx::new)
|
||||
.collect()
|
||||
Cow::Borrowed(&self.block_succs[start..end])
|
||||
}
|
||||
|
||||
fn is_ret(&self, insn: InstIx) -> bool {
|
||||
|
@ -649,7 +457,7 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
|||
insn.get_regs(collector)
|
||||
}
|
||||
|
||||
fn map_regs(insn: &mut I, mapper: &RegUsageMapper) {
|
||||
fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
|
||||
insn.map_regs(mapper);
|
||||
}
|
||||
|
||||
|
@ -702,12 +510,11 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "VCode_Debug {{")?;
|
||||
writeln!(f, " Entry block: {}", self.entry)?;
|
||||
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
|
||||
|
||||
for block in 0..self.num_blocks() {
|
||||
writeln!(f, "Block {}:", block,)?;
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
writeln!(f, " (successor: Block {})", succ)?;
|
||||
writeln!(f, " (successor: Block {})", succ.get())?;
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
writeln!(f, " (instruction range: {} .. {})", start, end)?;
|
||||
|
@ -726,52 +533,21 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
|||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
// Calculate an order in which to display the blocks. This is the same
|
||||
// as final_block_order, but also includes blocks which are in the
|
||||
// representation but not in final_block_order.
|
||||
let mut display_order = Vec::<usize>::new();
|
||||
// First display blocks in `final_block_order`
|
||||
for bix in &self.final_block_order {
|
||||
assert!((*bix as usize) < self.num_blocks());
|
||||
display_order.push(*bix as usize);
|
||||
}
|
||||
// Now also take care of those not listed in `final_block_order`.
|
||||
// This is quadratic, but it's also debug-only code.
|
||||
for bix in 0..self.num_blocks() {
|
||||
if display_order.contains(&bix) {
|
||||
continue;
|
||||
}
|
||||
display_order.push(bix);
|
||||
}
|
||||
|
||||
let mut s = String::new();
|
||||
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
|
||||
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
|
||||
write!(
|
||||
&mut s,
|
||||
" Final block order: {:?}\n",
|
||||
self.final_block_order
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for i in 0..self.num_blocks() {
|
||||
let block = display_order[i];
|
||||
let block = i as BlockIndex;
|
||||
|
||||
let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
|
||||
{
|
||||
"** OMITTED **"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
|
||||
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
|
||||
write!(&mut s, "Block {}:\n", block).unwrap();
|
||||
if let Some(bb) = self.bindex_to_bb(block) {
|
||||
write!(&mut s, " (original IR block: {})\n", bb).unwrap();
|
||||
}
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
write!(&mut s, " (successor: Block {})\n", succ).unwrap();
|
||||
for succ in self.succs(block) {
|
||||
write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap();
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
|
||||
for inst in start..end {
|
||||
write!(
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
//! A pass that computes the number of uses of any given instruction.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::Value;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst};
|
||||
|
||||
/// Auxiliary data structure that counts the number of uses of any given
|
||||
/// instruction in a Function. This is used during instruction selection
|
||||
/// to essentially do incremental DCE: when an instruction is no longer
|
||||
/// needed because its computation has been isel'd into another machine
|
||||
/// instruction at every use site, we can skip it.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NumUses {
|
||||
uses: SecondaryMap<Inst, u32>,
|
||||
}
|
||||
|
||||
impl NumUses {
|
||||
fn new() -> NumUses {
|
||||
NumUses {
|
||||
uses: SecondaryMap::with_default(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the NumUses analysis result for a function.
|
||||
pub fn compute(func: &Function) -> NumUses {
|
||||
let mut uses = NumUses::new();
|
||||
for bb in func.layout.blocks() {
|
||||
for inst in func.layout.block_insts(bb) {
|
||||
for arg in func.dfg.inst_args(inst) {
|
||||
let v = func.dfg.resolve_aliases(*arg);
|
||||
uses.add_value(&func.dfg, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
uses
|
||||
}
|
||||
|
||||
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
|
||||
match dfg.value_def(v) {
|
||||
ValueDef::Result(inst, _) => {
|
||||
self.uses[inst] += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Take the complete uses map, consuming this analysis result.
|
||||
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
|
||||
self.uses
|
||||
}
|
||||
}
|
|
@ -0,0 +1,887 @@
|
|||
//! Glue for working with `peepmatic`-generated peephole optimizers.
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{
|
||||
dfg::DataFlowGraph,
|
||||
entities::{Inst, Value},
|
||||
immediates::{Imm64, Uimm64},
|
||||
instructions::{InstructionData, Opcode},
|
||||
types, InstBuilder,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
use cranelift_codegen_shared::condcodes::IntCC;
|
||||
use peepmatic_runtime::{
|
||||
cc::ConditionCode,
|
||||
instruction_set::InstructionSet,
|
||||
operator::Operator,
|
||||
part::{Constant, Part},
|
||||
paths::Path,
|
||||
r#type::{BitWidth, Kind, Type},
|
||||
PeepholeOptimizations, PeepholeOptimizer,
|
||||
};
|
||||
use std::boxed::Box;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::ptr;
|
||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||
|
||||
/// Get the `preopt.peepmatic` peephole optimizer.
|
||||
pub(crate) fn preopt<'a, 'b>(
|
||||
isa: &'b dyn TargetIsa,
|
||||
) -> PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa> {
|
||||
static SERIALIZED: &[u8] = include_bytes!("preopt.serialized");
|
||||
|
||||
// Once initialized, this must never be re-assigned. The initialized value
|
||||
// is semantically "static data" and is intentionally leaked for the whole
|
||||
// program's lifetime.
|
||||
static DESERIALIZED: AtomicPtr<PeepholeOptimizations> = AtomicPtr::new(ptr::null_mut());
|
||||
|
||||
// If `DESERIALIZED` has already been initialized, then just use it.
|
||||
let ptr = DESERIALIZED.load(Ordering::SeqCst);
|
||||
if let Some(peep_opts) = unsafe { ptr.as_ref() } {
|
||||
return peep_opts.optimizer(isa);
|
||||
}
|
||||
|
||||
// Otherwise, if `DESERIALIZED` hasn't been initialized, then we need to
|
||||
// deserialize the peephole optimizations and initialize it. However,
|
||||
// another thread could be doing the same thing concurrently, so there is a
|
||||
// race to see who initializes `DESERIALIZED` first, and we need to be
|
||||
// prepared to both win or lose that race.
|
||||
let peep_opts = PeepholeOptimizations::deserialize(SERIALIZED)
|
||||
.expect("should always be able to deserialize `preopt.serialized`");
|
||||
let peep_opts = Box::into_raw(Box::new(peep_opts));
|
||||
|
||||
// Only update `DESERIALIZE` if it is still null, attempting to perform the
|
||||
// one-time transition from null -> non-null.
|
||||
if DESERIALIZED
|
||||
.compare_and_swap(ptr::null_mut(), peep_opts, Ordering::SeqCst)
|
||||
.is_null()
|
||||
{
|
||||
// We won the race to initialize `DESERIALIZED`.
|
||||
debug_assert_eq!(DESERIALIZED.load(Ordering::SeqCst), peep_opts);
|
||||
let peep_opts = unsafe { &*peep_opts };
|
||||
return peep_opts.optimizer(isa);
|
||||
}
|
||||
|
||||
// We lost the race to initialize `DESERIALIZED`. Drop our no-longer-needed
|
||||
// instance of `peep_opts` and get the pointer to the instance that won the
|
||||
// race.
|
||||
let _ = unsafe { Box::from_raw(peep_opts) };
|
||||
let peep_opts = DESERIALIZED.load(Ordering::SeqCst);
|
||||
let peep_opts = unsafe { peep_opts.as_ref().unwrap() };
|
||||
peep_opts.optimizer(isa)
|
||||
}
|
||||
|
||||
/// Either a `Value` or an `Inst`.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ValueOrInst {
|
||||
Value(Value),
|
||||
Inst(Inst),
|
||||
}
|
||||
|
||||
impl ValueOrInst {
|
||||
/// Get the underlying `Value` if any.
|
||||
pub fn value(&self) -> Option<Value> {
|
||||
match *self {
|
||||
Self::Value(v) => Some(v),
|
||||
Self::Inst(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the underlying `Inst` if any.
|
||||
pub fn inst(&self) -> Option<Inst> {
|
||||
match *self {
|
||||
Self::Inst(i) => Some(i),
|
||||
Self::Value(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap the underlying `Value`, panicking if it is not a `Value.
|
||||
pub fn unwrap_value(&self) -> Value {
|
||||
self.value().unwrap()
|
||||
}
|
||||
|
||||
/// Unwrap the underlying `Inst`, panicking if it is not a `Inst.
|
||||
pub fn unwrap_inst(&self) -> Inst {
|
||||
self.inst().unwrap()
|
||||
}
|
||||
|
||||
/// Is this a `Value`?
|
||||
pub fn is_value(&self) -> bool {
|
||||
self.value().is_some()
|
||||
}
|
||||
|
||||
/// Is this an `Inst`?
|
||||
pub fn is_inst(&self) -> bool {
|
||||
self.inst().is_some()
|
||||
}
|
||||
|
||||
fn resolve_inst(&self, dfg: &DataFlowGraph) -> Option<Inst> {
|
||||
match *self {
|
||||
ValueOrInst::Inst(i) => Some(i),
|
||||
ValueOrInst::Value(v) => dfg.value_def(v).inst(),
|
||||
}
|
||||
}
|
||||
|
||||
fn result_bit_width(&self, dfg: &DataFlowGraph) -> u8 {
|
||||
match *self {
|
||||
ValueOrInst::Value(v) => dfg.value_type(v).bits().try_into().unwrap(),
|
||||
ValueOrInst::Inst(inst) => {
|
||||
let result = dfg.first_result(inst);
|
||||
dfg.value_type(result).bits().try_into().unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_constant(&self, pos: &mut FuncCursor) -> Option<Constant> {
|
||||
let inst = self.resolve_inst(&pos.func.dfg)?;
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} => {
|
||||
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
|
||||
let x: i64 = imm.into();
|
||||
Some(Constant::Int(x as u64, width))
|
||||
}
|
||||
InstructionData::UnaryBool {
|
||||
opcode: Opcode::Bconst,
|
||||
imm,
|
||||
} => {
|
||||
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
|
||||
Some(Constant::Bool(imm, width))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Value> for ValueOrInst {
|
||||
fn from(v: Value) -> ValueOrInst {
|
||||
ValueOrInst::Value(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Inst> for ValueOrInst {
|
||||
fn from(i: Inst) -> ValueOrInst {
|
||||
ValueOrInst::Inst(i)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the fixed bit width of `bit_width`, or if it is polymorphic, the bit
|
||||
/// width of `root`.
|
||||
fn bit_width(dfg: &DataFlowGraph, bit_width: BitWidth, root: Inst) -> u8 {
|
||||
bit_width.fixed_width().unwrap_or_else(|| {
|
||||
let tyvar = dfg.ctrl_typevar(root);
|
||||
let ty = dfg.compute_result_type(root, 0, tyvar).unwrap();
|
||||
u8::try_from(ty.bits()).unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert the constant `c` into an instruction.
|
||||
fn const_to_value<'a>(builder: impl InstBuilder<'a>, c: Constant, root: Inst) -> Value {
|
||||
match c {
|
||||
Constant::Bool(b, width) => {
|
||||
let width = bit_width(builder.data_flow_graph(), width, root);
|
||||
let ty = match width {
|
||||
1 => types::B1,
|
||||
8 => types::B8,
|
||||
16 => types::B16,
|
||||
32 => types::B32,
|
||||
64 => types::B64,
|
||||
128 => types::B128,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
builder.bconst(ty, b)
|
||||
}
|
||||
Constant::Int(x, width) => {
|
||||
let width = bit_width(builder.data_flow_graph(), width, root);
|
||||
let ty = match width {
|
||||
8 => types::I8,
|
||||
16 => types::I16,
|
||||
32 => types::I32,
|
||||
64 => types::I64,
|
||||
128 => types::I128,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
builder.iconst(ty, x as i64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn part_to_value(pos: &mut FuncCursor, root: Inst, part: Part<ValueOrInst>) -> Option<Value> {
|
||||
match part {
|
||||
Part::Instruction(ValueOrInst::Inst(inst)) => {
|
||||
pos.func.dfg.inst_results(inst).first().copied()
|
||||
}
|
||||
Part::Instruction(ValueOrInst::Value(v)) => Some(v),
|
||||
Part::Constant(c) => Some(const_to_value(pos.ins(), c, root)),
|
||||
Part::ConditionCode(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
fn to_peepmatic_operator(&self) -> Option<Operator> {
|
||||
macro_rules! convert {
|
||||
( $( $op:ident $(,)* )* ) => {
|
||||
match self {
|
||||
$( Self::$op => Some(Operator::$op), )*
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
convert!(
|
||||
AdjustSpDown,
|
||||
AdjustSpDownImm,
|
||||
Band,
|
||||
BandImm,
|
||||
Bconst,
|
||||
Bint,
|
||||
Bor,
|
||||
BorImm,
|
||||
Brnz,
|
||||
Brz,
|
||||
Bxor,
|
||||
BxorImm,
|
||||
Iadd,
|
||||
IaddImm,
|
||||
Icmp,
|
||||
IcmpImm,
|
||||
Iconst,
|
||||
Ifcmp,
|
||||
IfcmpImm,
|
||||
Imul,
|
||||
ImulImm,
|
||||
Ireduce,
|
||||
IrsubImm,
|
||||
Ishl,
|
||||
IshlImm,
|
||||
Isub,
|
||||
Rotl,
|
||||
RotlImm,
|
||||
Rotr,
|
||||
RotrImm,
|
||||
Sdiv,
|
||||
SdivImm,
|
||||
Select,
|
||||
Sextend,
|
||||
Srem,
|
||||
SremImm,
|
||||
Sshr,
|
||||
SshrImm,
|
||||
Trapnz,
|
||||
Trapz,
|
||||
Udiv,
|
||||
UdivImm,
|
||||
Uextend,
|
||||
Urem,
|
||||
UremImm,
|
||||
Ushr,
|
||||
UshrImm,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Constant> for Imm64 {
|
||||
type Error = &'static str;
|
||||
|
||||
fn try_from(c: Constant) -> Result<Self, Self::Error> {
|
||||
match c {
|
||||
Constant::Int(x, _) => Ok(Imm64::from(x as i64)),
|
||||
Constant::Bool(..) => Err("cannot create Imm64 from Constant::Bool"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Constant> for Imm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Constant {
|
||||
let x: i64 = self.into();
|
||||
Constant::Int(x as _, BitWidth::SixtyFour)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Part<ValueOrInst>> for Imm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Part<ValueOrInst> {
|
||||
let c: Constant = self.into();
|
||||
c.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn part_to_imm64(pos: &mut FuncCursor, part: Part<ValueOrInst>) -> Imm64 {
|
||||
return match part {
|
||||
Part::Instruction(x) => match x.to_constant(pos).unwrap_or_else(|| cannot_convert()) {
|
||||
Constant::Int(x, _) => (x as i64).into(),
|
||||
Constant::Bool(..) => cannot_convert(),
|
||||
},
|
||||
Part::Constant(Constant::Int(x, _)) => (x as i64).into(),
|
||||
Part::ConditionCode(_) | Part::Constant(Constant::Bool(..)) => cannot_convert(),
|
||||
};
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn cannot_convert() -> ! {
|
||||
panic!("cannot convert part into `Imm64`")
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Constant> for Uimm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Constant {
|
||||
let x: u64 = self.into();
|
||||
Constant::Int(x, BitWidth::SixtyFour)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Part<ValueOrInst>> for Uimm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Part<ValueOrInst> {
|
||||
let c: Constant = self.into();
|
||||
c.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn peepmatic_to_intcc(cc: ConditionCode) -> IntCC {
|
||||
match cc {
|
||||
ConditionCode::Eq => IntCC::Equal,
|
||||
ConditionCode::Ne => IntCC::NotEqual,
|
||||
ConditionCode::Slt => IntCC::SignedLessThan,
|
||||
ConditionCode::Sle => IntCC::SignedGreaterThanOrEqual,
|
||||
ConditionCode::Sgt => IntCC::SignedGreaterThan,
|
||||
ConditionCode::Sge => IntCC::SignedLessThanOrEqual,
|
||||
ConditionCode::Ult => IntCC::UnsignedLessThan,
|
||||
ConditionCode::Uge => IntCC::UnsignedGreaterThanOrEqual,
|
||||
ConditionCode::Ugt => IntCC::UnsignedGreaterThan,
|
||||
ConditionCode::Ule => IntCC::UnsignedLessThanOrEqual,
|
||||
ConditionCode::Of => IntCC::Overflow,
|
||||
ConditionCode::Nof => IntCC::NotOverflow,
|
||||
}
|
||||
}
|
||||
|
||||
fn intcc_to_peepmatic(cc: IntCC) -> ConditionCode {
|
||||
match cc {
|
||||
IntCC::Equal => ConditionCode::Eq,
|
||||
IntCC::NotEqual => ConditionCode::Ne,
|
||||
IntCC::SignedLessThan => ConditionCode::Slt,
|
||||
IntCC::SignedGreaterThanOrEqual => ConditionCode::Sle,
|
||||
IntCC::SignedGreaterThan => ConditionCode::Sgt,
|
||||
IntCC::SignedLessThanOrEqual => ConditionCode::Sge,
|
||||
IntCC::UnsignedLessThan => ConditionCode::Ult,
|
||||
IntCC::UnsignedGreaterThanOrEqual => ConditionCode::Uge,
|
||||
IntCC::UnsignedGreaterThan => ConditionCode::Ugt,
|
||||
IntCC::UnsignedLessThanOrEqual => ConditionCode::Ule,
|
||||
IntCC::Overflow => ConditionCode::Of,
|
||||
IntCC::NotOverflow => ConditionCode::Nof,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_immediate(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Part<ValueOrInst> {
|
||||
return match dfg[inst] {
|
||||
InstructionData::BinaryImm64 { imm, .. } if i == 0 => imm.into(),
|
||||
InstructionData::BranchIcmp { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::BranchInt { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompare { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompareImm { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompareImm { imm, .. } if i == 1 => imm.into(),
|
||||
InstructionData::IntCond { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCondTrap { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntSelect { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::UnaryBool { imm, .. } if i == 0 => {
|
||||
Constant::Bool(imm, BitWidth::Polymorphic).into()
|
||||
}
|
||||
InstructionData::UnaryImm { imm, .. } if i == 0 => imm.into(),
|
||||
ref otherwise => unsupported(otherwise),
|
||||
};
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn unsupported(data: &InstructionData) -> ! {
|
||||
panic!("unsupported instruction data: {:?}", data)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_argument(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Option<Value> {
|
||||
dfg.inst_args(inst).get(i).copied()
|
||||
}
|
||||
|
||||
fn peepmatic_ty_to_ir_ty(ty: Type, dfg: &DataFlowGraph, root: Inst) -> types::Type {
|
||||
match (ty.kind, bit_width(dfg, ty.bit_width, root)) {
|
||||
(Kind::Int, 8) => types::I8,
|
||||
(Kind::Int, 16) => types::I16,
|
||||
(Kind::Int, 32) => types::I32,
|
||||
(Kind::Int, 64) => types::I64,
|
||||
(Kind::Int, 128) => types::I128,
|
||||
(Kind::Bool, 1) => types::B1,
|
||||
(Kind::Bool, 8) => types::I8,
|
||||
(Kind::Bool, 16) => types::I16,
|
||||
(Kind::Bool, 32) => types::I32,
|
||||
(Kind::Bool, 64) => types::I64,
|
||||
(Kind::Bool, 128) => types::I128,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// NB: the unsafe contract we must uphold here is that our implementation of
|
||||
// `instruction_result_bit_width` must always return a valid, non-zero bit
|
||||
// width.
|
||||
unsafe impl<'a, 'b> InstructionSet<'b> for &'a dyn TargetIsa {
|
||||
type Context = FuncCursor<'b>;
|
||||
|
||||
type Instruction = ValueOrInst;
|
||||
|
||||
fn replace_instruction(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
old: ValueOrInst,
|
||||
new: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("replace {:?} with {:?}", old, new);
|
||||
let old_inst = old.resolve_inst(&pos.func.dfg).unwrap();
|
||||
|
||||
// Try to convert `new` to an instruction, because we prefer replacing
|
||||
// an old instruction with a new one wholesale. However, if the
|
||||
// replacement cannot be converted to an instruction (e.g. the
|
||||
// right-hand side is a block/function parameter value) then we change
|
||||
// the old instruction's result to an alias of the new value.
|
||||
let new_inst = match new {
|
||||
Part::Instruction(ValueOrInst::Inst(inst)) => Some(inst),
|
||||
Part::Instruction(ValueOrInst::Value(_)) => {
|
||||
// Do not try and follow the value definition. If we transplant
|
||||
// this value's instruction, and there are other uses of this
|
||||
// value, then we could mess up ordering between instructions.
|
||||
None
|
||||
}
|
||||
Part::Constant(c) => {
|
||||
let v = const_to_value(pos.ins(), c, old_inst);
|
||||
let inst = pos.func.dfg.value_def(v).unwrap_inst();
|
||||
Some(inst)
|
||||
}
|
||||
Part::ConditionCode(_) => None,
|
||||
};
|
||||
|
||||
match new_inst {
|
||||
Some(new_inst) => {
|
||||
pos.func.transplant_inst(old_inst, new_inst);
|
||||
debug_assert_eq!(pos.current_inst(), Some(old_inst));
|
||||
old_inst.into()
|
||||
}
|
||||
None => {
|
||||
let new_value = part_to_value(pos, old_inst, new).unwrap();
|
||||
|
||||
let old_results = pos.func.dfg.detach_results(old_inst);
|
||||
let old_results = old_results.as_slice(&pos.func.dfg.value_lists);
|
||||
assert_eq!(old_results.len(), 1);
|
||||
let old_value = old_results[0];
|
||||
|
||||
pos.func.dfg.change_to_alias(old_value, new_value);
|
||||
pos.func.dfg.replace(old_inst).nop();
|
||||
|
||||
new_value.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_part_at_path(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
path: Path,
|
||||
) -> Option<Part<ValueOrInst>> {
|
||||
// The root is path [0].
|
||||
debug_assert!(!path.0.is_empty());
|
||||
debug_assert_eq!(path.0[0], 0);
|
||||
|
||||
let mut part = Part::Instruction(root);
|
||||
for p in path.0[1..].iter().copied() {
|
||||
let inst = part.as_instruction()?.resolve_inst(&pos.func.dfg)?;
|
||||
let operator = pos.func.dfg[inst].opcode().to_peepmatic_operator()?;
|
||||
|
||||
if p < operator.immediates_arity() {
|
||||
part = get_immediate(&pos.func.dfg, inst, p as usize);
|
||||
continue;
|
||||
}
|
||||
|
||||
let arg = p - operator.immediates_arity();
|
||||
let arg = arg as usize;
|
||||
let value = get_argument(&pos.func.dfg, inst, arg)?;
|
||||
part = Part::Instruction(value.into());
|
||||
}
|
||||
|
||||
log::trace!("get_part_at_path({:?}) = {:?}", path, part);
|
||||
Some(part)
|
||||
}
|
||||
|
||||
fn operator(&self, pos: &mut FuncCursor<'b>, value_or_inst: ValueOrInst) -> Option<Operator> {
|
||||
let inst = value_or_inst.resolve_inst(&pos.func.dfg)?;
|
||||
pos.func.dfg[inst].opcode().to_peepmatic_operator()
|
||||
}
|
||||
|
||||
fn make_inst_1(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
r#type: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_1: {:?}({:?})", operator, a);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::AdjustSpDown => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
pos.ins().adjust_sp_down(a).into()
|
||||
}
|
||||
Operator::AdjustSpDownImm => {
|
||||
let c = a.unwrap_constant();
|
||||
let imm = Imm64::try_from(c).unwrap();
|
||||
pos.ins().adjust_sp_down_imm(imm).into()
|
||||
}
|
||||
Operator::Bconst => {
|
||||
let c = a.unwrap_constant();
|
||||
let val = const_to_value(pos.ins(), c, root);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bint => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().bint(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Brnz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// NB: branching instructions must be the root of an
|
||||
// optimization's right-hand side, so we get the destination
|
||||
// block and arguments from the left-hand side's root. Peepmatic
|
||||
// doesn't currently represent labels or varargs.
|
||||
let block = pos.func.dfg[root].branch_destination().unwrap();
|
||||
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
|
||||
|
||||
pos.ins().brnz(a, block, &args).into()
|
||||
}
|
||||
Operator::Brz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// See the comment in the `Operator::Brnz` match argm.
|
||||
let block = pos.func.dfg[root].branch_destination().unwrap();
|
||||
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
|
||||
|
||||
pos.ins().brz(a, block, &args).into()
|
||||
}
|
||||
Operator::Iconst => {
|
||||
let a = a.unwrap_constant();
|
||||
let val = const_to_value(pos.ins(), a, root);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ireduce => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().ireduce(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sextend => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().sextend(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Trapnz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// NB: similar to branching instructions (see comment in the
|
||||
// `Operator::Brnz` match arm) trapping instructions must be the
|
||||
// root of an optimization's right-hand side, and we get the
|
||||
// trap code from the root of the left-hand side. Peepmatic
|
||||
// doesn't currently represent trap codes.
|
||||
let code = pos.func.dfg[root].trap_code().unwrap();
|
||||
|
||||
pos.ins().trapnz(a, code).into()
|
||||
}
|
||||
Operator::Trapz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
// See comment in the `Operator::Trapnz` match arm.
|
||||
let code = pos.func.dfg[root].trap_code().unwrap();
|
||||
pos.ins().trapz(a, code).into()
|
||||
}
|
||||
Operator::Uextend => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().uextend(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_inst_2(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
_: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
b: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_2: {:?}({:?}, {:?})", operator, a, b);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::Band => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().band(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BandImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().band_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bor => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bor(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BorImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bor_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bxor => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bxor(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BxorImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bxor_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Iadd => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().iadd(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IaddImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().iadd_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ifcmp => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ifcmp(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IfcmpImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ifcmp_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Imul => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().imul(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::ImulImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().imul_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IrsubImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().irsub_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ishl => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ishl(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IshlImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ishl_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Isub => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().isub(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Rotl => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotl(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::RotlImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotl_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Rotr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::RotrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sdiv => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sdiv(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SdivImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sdiv_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Srem => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().srem(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SremImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().srem_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sshr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sshr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SshrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sshr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Udiv => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().udiv(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UdivImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().udiv_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Urem => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().urem(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UremImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().urem_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ushr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ushr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UshrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ushr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_inst_3(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
_: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
b: Part<ValueOrInst>,
|
||||
c: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_3: {:?}({:?}, {:?}, {:?})", operator, a, b, c);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::Icmp => {
|
||||
let cond = a.unwrap_condition_code();
|
||||
let cond = peepmatic_to_intcc(cond);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().icmp(cond, b, c);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IcmpImm => {
|
||||
let cond = a.unwrap_condition_code();
|
||||
let cond = peepmatic_to_intcc(cond);
|
||||
let imm = part_to_imm64(pos, b);
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().icmp_imm(cond, c, imm);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Select => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().select(a, b, c);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn instruction_to_constant(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
value_or_inst: ValueOrInst,
|
||||
) -> Option<Constant> {
|
||||
value_or_inst.to_constant(pos)
|
||||
}
|
||||
|
||||
fn instruction_result_bit_width(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
value_or_inst: ValueOrInst,
|
||||
) -> u8 {
|
||||
value_or_inst.result_bit_width(&pos.func.dfg)
|
||||
}
|
||||
|
||||
fn native_word_size_in_bits(&self, _pos: &mut FuncCursor<'b>) -> u8 {
|
||||
self.pointer_bits()
|
||||
}
|
||||
}
|
|
@ -271,6 +271,42 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
|
|||
.replace(inst)
|
||||
.sload32_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Store => {
|
||||
pos.func.dfg.replace(inst).store_complex(
|
||||
info.flags,
|
||||
|
@ -305,7 +341,7 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
|
|||
}
|
||||
_ => panic!("Unsupported load or store opcode"),
|
||||
},
|
||||
InstructionData::BinaryImm {
|
||||
InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg,
|
||||
imm,
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
;; Apply basic simplifications.
|
||||
;;
|
||||
;; This folds constants with arithmetic to form `_imm` instructions, and other
|
||||
;; minor simplifications.
|
||||
;;
|
||||
;; Doesn't apply some simplifications if the native word width (in bytes) is
|
||||
;; smaller than the controlling type's width of the instruction. This would
|
||||
;; result in an illegal instruction that would likely be expanded back into an
|
||||
;; instruction on smaller types with the same initial opcode, creating
|
||||
;; unnecessary churn.
|
||||
|
||||
;; Binary instructions whose second argument is constant.
|
||||
(=> (when (iadd $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $C $x))
|
||||
(=> (when (imul $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(imul_imm $C $x))
|
||||
(=> (when (sdiv $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(sdiv_imm $C $x))
|
||||
(=> (when (udiv $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(udiv_imm $C $x))
|
||||
(=> (when (srem $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(srem_imm $C $x))
|
||||
(=> (when (urem $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(urem_imm $C $x))
|
||||
(=> (when (band $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(band_imm $C $x))
|
||||
(=> (when (bor $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(bor_imm $C $x))
|
||||
(=> (when (bxor $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(bxor_imm $C $x))
|
||||
(=> (when (rotl $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(rotl_imm $C $x))
|
||||
(=> (when (rotr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(rotr_imm $C $x))
|
||||
(=> (when (ishl $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ishl_imm $C $x))
|
||||
(=> (when (ushr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ushr_imm $C $x))
|
||||
(=> (when (sshr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(sshr_imm $C $x))
|
||||
(=> (when (isub $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $(neg $C) $x))
|
||||
(=> (when (ifcmp $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ifcmp_imm $C $x))
|
||||
(=> (when (icmp $cond $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(icmp_imm $cond $C $x))
|
||||
|
||||
;; Binary instructions whose first operand is constant.
|
||||
(=> (when (iadd $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $C $x))
|
||||
(=> (when (imul $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(imul_imm $C $x))
|
||||
(=> (when (band $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(band_imm $C $x))
|
||||
(=> (when (bor $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(bor_imm $C $x))
|
||||
(=> (when (bxor $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(bxor_imm $C $x))
|
||||
(=> (when (isub $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(irsub_imm $C $x))
|
||||
|
||||
;; Unary instructions whose operand is constant.
|
||||
(=> (adjust_sp_down $C) (adjust_sp_down_imm $C))
|
||||
|
||||
;; Fold `(binop_imm $C1 (binop_imm $C2 $x))` into `(binop_imm $(binop $C2 $C1) $x)`.
|
||||
(=> (iadd_imm $C1 (iadd_imm $C2 $x)) (iadd_imm $(iadd $C1 $C2) $x))
|
||||
(=> (imul_imm $C1 (imul_imm $C2 $x)) (imul_imm $(imul $C1 $C2) $x))
|
||||
(=> (bor_imm $C1 (bor_imm $C2 $x)) (bor_imm $(bor $C1 $C2) $x))
|
||||
(=> (band_imm $C1 (band_imm $C2 $x)) (band_imm $(band $C1 $C2) $x))
|
||||
(=> (bxor_imm $C1 (bxor_imm $C2 $x)) (bxor_imm $(bxor $C1 $C2) $x))
|
||||
|
||||
;; Remove operations that are no-ops.
|
||||
(=> (iadd_imm 0 $x) $x)
|
||||
(=> (imul_imm 1 $x) $x)
|
||||
(=> (sdiv_imm 1 $x) $x)
|
||||
(=> (udiv_imm 1 $x) $x)
|
||||
(=> (bor_imm 0 $x) $x)
|
||||
(=> (band_imm -1 $x) $x)
|
||||
(=> (bxor_imm 0 $x) $x)
|
||||
(=> (rotl_imm 0 $x) $x)
|
||||
(=> (rotr_imm 0 $x) $x)
|
||||
(=> (ishl_imm 0 $x) $x)
|
||||
(=> (ushr_imm 0 $x) $x)
|
||||
(=> (sshr_imm 0 $x) $x)
|
||||
|
||||
;; Replace with zero.
|
||||
(=> (imul_imm 0 $x) 0)
|
||||
(=> (band_imm 0 $x) 0)
|
||||
|
||||
;; Replace with negative 1.
|
||||
(=> (bor_imm -1 $x) -1)
|
||||
|
||||
;; Transform `[(x << N) >> N]` into a (un)signed-extending move.
|
||||
;;
|
||||
;; i16 -> i8 -> i16
|
||||
(=> (when (ushr_imm 8 (ishl_imm 8 $x))
|
||||
(bit-width $x 16))
|
||||
(uextend{i16} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 8 (ishl_imm 8 $x))
|
||||
(bit-width $x 16))
|
||||
(sextend{i16} (ireduce{i8} $x)))
|
||||
;; i32 -> i8 -> i32
|
||||
(=> (when (ushr_imm 24 (ishl_imm 24 $x))
|
||||
(bit-width $x 32))
|
||||
(uextend{i32} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 24 (ishl_imm 24 $x))
|
||||
(bit-width $x 32))
|
||||
(sextend{i32} (ireduce{i8} $x)))
|
||||
;; i32 -> i16 -> i32
|
||||
(=> (when (ushr_imm 16 (ishl_imm 16 $x))
|
||||
(bit-width $x 32))
|
||||
(uextend{i32} (ireduce{i16} $x)))
|
||||
(=> (when (sshr_imm 16 (ishl_imm 16 $x))
|
||||
(bit-width $x 32))
|
||||
(sextend{i32} (ireduce{i16} $x)))
|
||||
;; i64 -> i8 -> i64
|
||||
(=> (when (ushr_imm 56 (ishl_imm 56 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 56 (ishl_imm 56 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i8} $x)))
|
||||
;; i64 -> i16 -> i64
|
||||
(=> (when (ushr_imm 48 (ishl_imm 48 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i16} $x)))
|
||||
(=> (when (sshr_imm 48 (ishl_imm 48 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i16} $x)))
|
||||
;; i64 -> i32 -> i64
|
||||
(=> (when (ushr_imm 32 (ishl_imm 32 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i32} $x)))
|
||||
(=> (when (sshr_imm 32 (ishl_imm 32 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i32} $x)))
|
||||
|
||||
;; Fold away redundant `bint` instructions that accept both integer and boolean
|
||||
;; arguments.
|
||||
(=> (select (bint $x) $y $z) (select $x $y $z))
|
||||
(=> (brz (bint $x)) (brz $x))
|
||||
(=> (brnz (bint $x)) (brnz $x))
|
||||
(=> (trapz (bint $x)) (trapz $x))
|
||||
(=> (trapnz (bint $x)) (trapnz $x))
|
||||
|
||||
;; Fold comparisons into branch operations when possible.
|
||||
;;
|
||||
;; This matches against operations which compare against zero, then use the
|
||||
;; result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
;; single `brz` or `brnz`.
|
||||
(=> (brnz (icmp_imm ne 0 $x)) (brnz $x))
|
||||
(=> (brz (icmp_imm ne 0 $x)) (brz $x))
|
||||
(=> (brnz (icmp_imm eq 0 $x)) (brz $x))
|
||||
(=> (brz (icmp_imm eq 0 $x)) (brnz $x))
|
||||
|
||||
;; Division and remainder by constants.
|
||||
;;
|
||||
;; TODO: this section is incomplete, and a bunch of related optimizations are
|
||||
;; still hand-coded in `simple_preopt.rs`.
|
||||
|
||||
;; (Division by one is handled above.)
|
||||
|
||||
;; Remainder by one is zero.
|
||||
(=> (urem_imm 1 $x) 0)
|
||||
(=> (srem_imm 1 $x) 0)
|
||||
|
||||
;; Division by a power of two -> shift right.
|
||||
(=> (when (udiv_imm $C $x)
|
||||
(is-power-of-two $C))
|
||||
(ushr_imm $(log2 $C) $x))
|
Двоичный файл не отображается.
|
@ -0,0 +1,393 @@
|
|||
//! A Constant-Phi-Node removal pass.
|
||||
|
||||
use log::info;
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::EntityList;
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::Function;
|
||||
use crate::ir::{Block, Inst, Value};
|
||||
use crate::timing;
|
||||
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::vec::Vec;
|
||||
|
||||
// A note on notation. For the sake of clarity, this file uses the phrase
|
||||
// "formal parameters" to mean the `Value`s listed in the block head, and
|
||||
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
|
||||
//
|
||||
// block4(v16: i32, v18: i32): <-- formal parameters
|
||||
// ...
|
||||
// brnz v27, block7(v22, v24) <-- actual parameters
|
||||
// jump block6
|
||||
|
||||
// This transformation pass (conceptually) partitions all values in the
|
||||
// function into two groups:
|
||||
//
|
||||
// * Group A: values defined by block formal parameters, except for the entry block.
|
||||
//
|
||||
// * Group B: All other values: that is, values defined by instructions,
|
||||
// and the formals of the entry block.
|
||||
//
|
||||
// For each value in Group A, it attempts to establish whether it will have
|
||||
// the value of exactly one member of Group B. If so, the formal parameter is
|
||||
// deleted, all corresponding actual parameters (in jumps/branches to the
|
||||
// defining block) are deleted, and a rename is inserted.
|
||||
//
|
||||
// The entry block is special-cased because (1) we don't know what values flow
|
||||
// to its formals and (2) in any case we can't change its formals.
|
||||
//
|
||||
// Work proceeds in three phases.
|
||||
//
|
||||
// * Phase 1: examine all instructions. For each block, make up a useful
|
||||
// grab-bag of information, `BlockSummary`, that summarises the block's
|
||||
// formals and jump/branch instruction. This is used by Phases 2 and 3.
|
||||
//
|
||||
// * Phase 2: for each value in Group A, try to find a single Group B value
|
||||
// that flows to it. This is done using a classical iterative forward
|
||||
// dataflow analysis over a simple constant-propagation style lattice. It
|
||||
// converges quickly in practice -- I have seen at most 4 iterations. This
|
||||
// is relatively cheap because the iteration is done over the
|
||||
// `BlockSummary`s, and does not visit each instruction. The resulting
|
||||
// fixed point is stored in a `SolverState`.
|
||||
//
|
||||
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
|
||||
// remove redundant formals and actuals, and to insert suitable renames.
|
||||
//
|
||||
// Note that the effectiveness of the analysis depends on on the fact that
|
||||
// there are no copy instructions in Cranelift's IR. If there were, the
|
||||
// computation of `actual_absval` in Phase 2 would have to be extended to
|
||||
// chase through such copies.
|
||||
//
|
||||
// For large functions, the analysis cost using the new AArch64 backend is about
|
||||
// 0.6% of the non-optimising compile time, as measured by instruction counts.
|
||||
// This transformation usually pays for itself several times over, though, by
|
||||
// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
|
||||
// seen for large functions.
|
||||
|
||||
// The `Value`s (Group B) that can flow to a formal parameter (Group A).
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
enum AbstractValue {
|
||||
// Two or more values flow to this formal.
|
||||
Many,
|
||||
// Exactly one value, as stated, flows to this formal. The `Value`s that
|
||||
// can appear here are exactly: `Value`s defined by `Inst`s, plus the
|
||||
// `Value`s defined by the formals of the entry block. Note that this is
|
||||
// exactly the set of `Value`s that are *not* tracked in the solver below
|
||||
// (see `SolverState`).
|
||||
One(Value /*Group B*/),
|
||||
// No value flows to this formal.
|
||||
None,
|
||||
}
|
||||
|
||||
impl AbstractValue {
|
||||
fn join(self, other: AbstractValue) -> AbstractValue {
|
||||
match (self, other) {
|
||||
// Joining with `None` has no effect
|
||||
(AbstractValue::None, p2) => p2,
|
||||
(p1, AbstractValue::None) => p1,
|
||||
// Joining with `Many` produces `Many`
|
||||
(AbstractValue::Many, _p2) => AbstractValue::Many,
|
||||
(_p1, AbstractValue::Many) => AbstractValue::Many,
|
||||
// The only interesting case
|
||||
(AbstractValue::One(v1), AbstractValue::One(v2)) => {
|
||||
if v1 == v2 {
|
||||
AbstractValue::One(v1)
|
||||
} else {
|
||||
AbstractValue::Many
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn is_one(self) -> bool {
|
||||
if let AbstractValue::One(_) = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For some block, a useful bundle of info. The `Block` itself is not stored
|
||||
// here since it will be the key in the associated `FxHashMap` -- see
|
||||
// `summaries` below. For the `SmallVec` tuning params: most blocks have
|
||||
// few parameters, hence `4`. And almost all blocks have either one or two
|
||||
// successors, hence `2`.
|
||||
#[derive(Debug)]
|
||||
struct BlockSummary {
|
||||
// Formal parameters for this `Block`
|
||||
formals: SmallVec<[Value; 4] /*Group A*/>,
|
||||
// For each `Inst` in this block that transfers to another block: the
|
||||
// `Inst` itself, the destination `Block`, and the actual parameters
|
||||
// passed. We don't bother to include transfers that pass zero parameters
|
||||
// since that makes more work for the solver for no purpose.
|
||||
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
|
||||
}
|
||||
impl BlockSummary {
|
||||
fn new(formals: SmallVec<[Value; 4]>) -> Self {
|
||||
Self {
|
||||
formals,
|
||||
dests: smallvec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Solver state. This holds a AbstractValue for each formal parameter, except
|
||||
// for those from the entry block.
|
||||
struct SolverState {
|
||||
absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
|
||||
}
|
||||
impl SolverState {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
absvals: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
fn get(&self, actual: Value) -> AbstractValue {
|
||||
match self.absvals.get(&actual) {
|
||||
Some(lp) => *lp,
|
||||
None => panic!("SolverState::get: formal param {:?} is untracked?!", actual),
|
||||
}
|
||||
}
|
||||
fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
|
||||
self.absvals.get(&actual)
|
||||
}
|
||||
fn set(&mut self, actual: Value, lp: AbstractValue) {
|
||||
match self.absvals.insert(actual, lp) {
|
||||
Some(_old_lp) => {}
|
||||
None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect phis in `func` that will only ever produce one value, using a
|
||||
/// classic forward dataflow analysis. Then remove them.
|
||||
#[inline(never)]
|
||||
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::remove_constant_phis();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
// Get the blocks, in reverse postorder
|
||||
let mut blocks_reverse_postorder = Vec::<Block>::new();
|
||||
for block in domtree.cfg_postorder() {
|
||||
blocks_reverse_postorder.push(*block);
|
||||
}
|
||||
blocks_reverse_postorder.reverse();
|
||||
|
||||
// Phase 1 of 3: for each block, make a summary containing all relevant
|
||||
// info. The solver will iterate over the summaries, rather than having
|
||||
// to inspect each instruction in each block.
|
||||
let mut summaries = FxHashMap::<Block, BlockSummary>::default();
|
||||
|
||||
for b in &blocks_reverse_postorder {
|
||||
let formals = func.dfg.block_params(*b);
|
||||
let mut summary = BlockSummary::new(SmallVec::from(formals));
|
||||
|
||||
for inst in func.layout.block_insts(*b) {
|
||||
let idetails = &func.dfg[inst];
|
||||
// Note that multi-dest transfers (i.e., branch tables) don't
|
||||
// carry parameters in our IR, so we only have to care about
|
||||
// `SingleDest` here.
|
||||
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
|
||||
{
|
||||
let inst_var_args = func.dfg.inst_variable_args(inst);
|
||||
// Skip branches/jumps that carry no params.
|
||||
if inst_var_args.len() > 0 {
|
||||
let mut actuals = SmallVec::<[Value; 4]>::new();
|
||||
for arg in inst_var_args {
|
||||
let arg = func.dfg.resolve_aliases(*arg);
|
||||
actuals.push(arg);
|
||||
}
|
||||
summary.dests.push((inst, dest, actuals));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the invariant that all blocks (except for the entry) appear
|
||||
// in the summary, *unless* they have neither formals nor any
|
||||
// param-carrying branches/jumps.
|
||||
if formals.len() > 0 || summary.dests.len() > 0 {
|
||||
summaries.insert(*b, summary);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2 of 3: iterate over the summaries in reverse postorder,
|
||||
// computing new `AbstractValue`s for each tracked `Value`. The set of
|
||||
// tracked `Value`s is exactly Group A as described above.
|
||||
|
||||
let entry_block = func
|
||||
.layout
|
||||
.entry_block()
|
||||
.expect("remove_constant_phis: entry block unknown");
|
||||
|
||||
// Set up initial solver state
|
||||
let mut state = SolverState::new();
|
||||
|
||||
for b in &blocks_reverse_postorder {
|
||||
// For each block, get the formals
|
||||
if *b == entry_block {
|
||||
continue;
|
||||
}
|
||||
let formals: &[Value] = func.dfg.block_params(*b);
|
||||
for formal in formals {
|
||||
let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
|
||||
assert!(mb_old_absval.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
// Solve: repeatedly traverse the blocks in reverse postorder, until there
|
||||
// are no changes.
|
||||
let mut iter_no = 0;
|
||||
loop {
|
||||
iter_no += 1;
|
||||
let mut changed = false;
|
||||
|
||||
for src in &blocks_reverse_postorder {
|
||||
let mb_src_summary = summaries.get(src);
|
||||
// The src block might have no summary. This means it has no
|
||||
// branches/jumps that carry parameters *and* it doesn't take any
|
||||
// parameters itself. Phase 1 ensures this. So we can ignore it.
|
||||
if mb_src_summary.is_none() {
|
||||
continue;
|
||||
}
|
||||
let src_summary = mb_src_summary.unwrap();
|
||||
for (_inst, dst, src_actuals) in &src_summary.dests {
|
||||
assert!(*dst != entry_block);
|
||||
// By contrast, the dst block must have a summary. Phase 1
|
||||
// will have only included an entry in `src_summary.dests` if
|
||||
// that branch/jump carried at least one parameter. So the
|
||||
// dst block does take parameters, so it must have a summary.
|
||||
let dst_summary = summaries
|
||||
.get(dst)
|
||||
.expect("remove_constant_phis: dst block has no summary");
|
||||
let dst_formals = &dst_summary.formals;
|
||||
assert!(src_actuals.len() == dst_formals.len());
|
||||
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
|
||||
// Find the abstract value for `actual`. If it is a block
|
||||
// formal parameter then the most recent abstract value is
|
||||
// to be found in the solver state. If not, then it's a
|
||||
// real value defining point (not a phi), in which case
|
||||
// return it itself.
|
||||
let actual_absval = match state.maybe_get(*actual) {
|
||||
Some(pt) => *pt,
|
||||
None => AbstractValue::One(*actual),
|
||||
};
|
||||
|
||||
// And `join` the new value with the old.
|
||||
let formal_absval_old = state.get(*formal);
|
||||
let formal_absval_new = formal_absval_old.join(actual_absval);
|
||||
if formal_absval_new != formal_absval_old {
|
||||
changed = true;
|
||||
state.set(*formal, formal_absval_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !changed {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut n_consts = 0;
|
||||
for absval in state.absvals.values() {
|
||||
if absval.is_one() {
|
||||
n_consts += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3 of 3: edit the function to remove constant formals, using the
|
||||
// summaries and the final solver state as a guide.
|
||||
|
||||
// Make up a set of blocks that need editing.
|
||||
let mut need_editing = FxHashSet::<Block>::default();
|
||||
for (block, summary) in &summaries {
|
||||
if *block == entry_block {
|
||||
continue;
|
||||
}
|
||||
for formal in &summary.formals {
|
||||
let formal_absval = state.get(*formal);
|
||||
if formal_absval.is_one() {
|
||||
need_editing.insert(*block);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Firstly, deal with the formals. For each formal which is redundant,
|
||||
// remove it, and also add a reroute from it to the constant value which
|
||||
// it we know it to be.
|
||||
for b in &need_editing {
|
||||
let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
|
||||
let formals: &[Value] = func.dfg.block_params(*b);
|
||||
for formal in formals {
|
||||
// The state must give an absval for `formal`.
|
||||
if let AbstractValue::One(replacement_val) = state.get(*formal) {
|
||||
del_these.push((*formal, replacement_val));
|
||||
}
|
||||
}
|
||||
// We can delete the formals in any order. However,
|
||||
// `remove_block_param` works by sliding backwards all arguments to
|
||||
// the right of the it is asked to delete. Hence when removing more
|
||||
// than one formal, it is significantly more efficient to ask it to
|
||||
// remove the rightmost formal first, and hence this `reverse`.
|
||||
del_these.reverse();
|
||||
for (redundant_formal, replacement_val) in del_these {
|
||||
func.dfg.remove_block_param(redundant_formal);
|
||||
func.dfg.change_to_alias(redundant_formal, replacement_val);
|
||||
}
|
||||
}
|
||||
|
||||
// Secondly, visit all branch insns. If the destination has had its
|
||||
// formals changed, change the actuals accordingly. Don't scan all insns,
|
||||
// rather just visit those as listed in the summaries we prepared earlier.
|
||||
for (_src_block, summary) in &summaries {
|
||||
for (inst, dst_block, _src_actuals) in &summary.dests {
|
||||
if !need_editing.contains(dst_block) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let old_actuals = func.dfg[*inst].take_value_list().unwrap();
|
||||
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
|
||||
let num_fixed_actuals = func.dfg[*inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let dst_summary = summaries.get(&dst_block).unwrap();
|
||||
|
||||
// Check that the numbers of arguments make sense.
|
||||
assert!(num_fixed_actuals <= num_old_actuals);
|
||||
assert!(num_fixed_actuals + dst_summary.formals.len() == num_old_actuals);
|
||||
|
||||
// Create a new value list.
|
||||
let mut new_actuals = EntityList::<Value>::new();
|
||||
// Copy the fixed args to the new list
|
||||
for i in 0..num_fixed_actuals {
|
||||
let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
|
||||
new_actuals.push(val, &mut func.dfg.value_lists);
|
||||
}
|
||||
|
||||
// Copy the variable args (the actual block params) to the new
|
||||
// list, filtering out redundant ones.
|
||||
for i in 0..dst_summary.formals.len() {
|
||||
let actual_i = old_actuals
|
||||
.get(num_fixed_actuals + i, &func.dfg.value_lists)
|
||||
.unwrap();
|
||||
let formal_i = dst_summary.formals[i];
|
||||
let is_redundant = state.get(formal_i).is_one();
|
||||
if !is_redundant {
|
||||
new_actuals.push(actual_i, &mut func.dfg.value_lists);
|
||||
}
|
||||
}
|
||||
func.dfg[*inst].put_value_list(new_actuals);
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
|
||||
iter_no,
|
||||
state.absvals.len(),
|
||||
n_consts
|
||||
);
|
||||
}
|
|
@ -10,10 +10,8 @@ use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
|
|||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{
|
||||
condcodes::{CondCode, IntCC},
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::{Opcode, ValueList},
|
||||
types::{I16, I32, I64, I8},
|
||||
instructions::Opcode,
|
||||
types::{I32, I64},
|
||||
Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
|
@ -144,7 +142,7 @@ fn package_up_divrem_info(
|
|||
/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
|
||||
/// signedness, operation size and div-vs-rem-ness in a handy bundle.
|
||||
fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
|
||||
if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] {
|
||||
if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
|
||||
let (is_signed, is_rem) = match opcode {
|
||||
Opcode::UdivImm => (false, false),
|
||||
Opcode::UremImm => (false, true),
|
||||
|
@ -468,340 +466,6 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
|
||||
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = dfg[candidate_inst]
|
||||
{
|
||||
return Some(imm);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
|
||||
/// Returns true if the final instruction has been converted to such a move.
|
||||
fn try_fold_extended_move(
|
||||
pos: &mut FuncCursor,
|
||||
inst: Inst,
|
||||
opcode: Opcode,
|
||||
arg: Value,
|
||||
imm: immediates::Imm64,
|
||||
) -> bool {
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm {
|
||||
opcode: Opcode::IshlImm,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if imm != *prev_imm {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let imm_bits: i64 = imm.into();
|
||||
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
|
||||
8 => I8,
|
||||
16 => I16,
|
||||
32 => I32,
|
||||
_ => return false,
|
||||
};
|
||||
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
|
||||
|
||||
// This becomes a no-op, since ireduce_ty has a smaller lane width than
|
||||
// the argument type (also the destination type).
|
||||
let arg = *prev_arg;
|
||||
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
|
||||
|
||||
if opcode == Opcode::UshrImm {
|
||||
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
|
||||
/// simplifications.
|
||||
///
|
||||
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
|
||||
/// controlling type's width of the instruction. This would result in an illegal instruction that
|
||||
/// would likely be expanded back into an instruction on smaller types with the same initial
|
||||
/// opcode, creating unnecessary churn.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
Opcode::Ifcmp => Opcode::IfcmpImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(new_opcode, ty, imm, args[0]);
|
||||
|
||||
// Repeat for BinaryImm simplification.
|
||||
simplify(pos, inst, native_word_width);
|
||||
}
|
||||
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(new_opcode, ty, imm, args[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
if let Opcode::AdjustSpDown = opcode {
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
|
||||
// Note this works for both positive and negative immediate values.
|
||||
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::BinaryImm { opcode, arg, imm } => {
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
|
||||
let mut arg = arg;
|
||||
let mut imm = imm;
|
||||
match opcode {
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BandImm
|
||||
| Opcode::BxorImm => {
|
||||
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm {
|
||||
opcode: prev_opcode,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) {
|
||||
let lhs: i64 = imm.into();
|
||||
let rhs: i64 = (*prev_imm).into();
|
||||
let new_imm = match opcode {
|
||||
Opcode::BorImm => lhs | rhs,
|
||||
Opcode::BandImm => lhs & rhs,
|
||||
Opcode::BxorImm => lhs ^ rhs,
|
||||
Opcode::IaddImm => lhs.wrapping_add(rhs),
|
||||
Opcode::ImulImm => lhs.wrapping_mul(rhs),
|
||||
_ => panic!("can't happen"),
|
||||
};
|
||||
let new_imm = immediates::Imm64::from(new_imm);
|
||||
let new_arg = *prev_arg;
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(opcode, ty, new_imm, new_arg);
|
||||
imm = new_imm;
|
||||
arg = new_arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::UshrImm | Opcode::SshrImm => {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
|
||||
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Replace operations that are no-ops.
|
||||
match (opcode, imm.into()) {
|
||||
(Opcode::IaddImm, 0)
|
||||
| (Opcode::ImulImm, 1)
|
||||
| (Opcode::SdivImm, 1)
|
||||
| (Opcode::UdivImm, 1)
|
||||
| (Opcode::BorImm, 0)
|
||||
| (Opcode::BandImm, -1)
|
||||
| (Opcode::BxorImm, 0)
|
||||
| (Opcode::RotlImm, 0)
|
||||
| (Opcode::RotrImm, 0)
|
||||
| (Opcode::IshlImm, 0)
|
||||
| (Opcode::UshrImm, 0)
|
||||
| (Opcode::SshrImm, 0) => {
|
||||
// Alias the result value with the original argument.
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
|
||||
}
|
||||
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
|
||||
// Replace by zero.
|
||||
pos.func.dfg.replace(inst).iconst(ty, 0);
|
||||
}
|
||||
(Opcode::BorImm, -1) => {
|
||||
// Replace by minus one.
|
||||
pos.func.dfg.replace(inst).iconst(ty, -1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::CondTrap { .. }
|
||||
| InstructionData::Branch { .. }
|
||||
| InstructionData::Ternary {
|
||||
opcode: Opcode::Select,
|
||||
..
|
||||
} => {
|
||||
// Fold away a redundant `bint`.
|
||||
let condition_def = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
pos.func.dfg.value_def(args[0])
|
||||
};
|
||||
if let ValueDef::Result(def_inst, _) = condition_def {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
struct BranchOptInfo {
|
||||
br_inst: Inst,
|
||||
cmp_arg: Value,
|
||||
args: ValueList,
|
||||
new_opcode: Opcode,
|
||||
}
|
||||
|
||||
/// Fold comparisons into branch operations when possible.
|
||||
///
|
||||
/// This matches against operations which compare against zero, then use the
|
||||
/// result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
/// single `brz` or `brnz`.
|
||||
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
|
||||
let mut info = if let InstructionData::Branch {
|
||||
opcode: br_opcode,
|
||||
args: ref br_args,
|
||||
..
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let first_arg = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
args[0]
|
||||
};
|
||||
|
||||
let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
icmp_inst
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::IntCompareImm {
|
||||
opcode: Opcode::IcmpImm,
|
||||
arg: cmp_arg,
|
||||
cond: cmp_cond,
|
||||
imm: cmp_imm,
|
||||
} = pos.func.dfg[icmp_inst]
|
||||
{
|
||||
let cmp_imm: i64 = cmp_imm.into();
|
||||
if cmp_imm != 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// icmp_imm returns non-zero when the comparison is true. So, if
|
||||
// we're branching on zero, we need to invert the condition.
|
||||
let cond = match br_opcode {
|
||||
Opcode::Brz => cmp_cond.inverse(),
|
||||
Opcode::Brnz => cmp_cond,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let new_opcode = match cond {
|
||||
IntCC::Equal => Opcode::Brz,
|
||||
IntCC::NotEqual => Opcode::Brnz,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
BranchOptInfo {
|
||||
br_inst: inst,
|
||||
cmp_arg,
|
||||
args: br_args.clone(),
|
||||
new_opcode,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
|
||||
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
|
||||
*opcode = info.new_opcode;
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
||||
enum BranchOrderKind {
|
||||
BrzToBrnz(Value),
|
||||
BrnzToBrz(Value),
|
||||
|
@ -944,15 +608,490 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block,
|
|||
cfg.recompute_block(pos.func, block);
|
||||
}
|
||||
|
||||
#[cfg(feature = "enable-peepmatic")]
|
||||
mod simplify {
|
||||
use super::*;
|
||||
use crate::peepmatic::ValueOrInst;
|
||||
|
||||
pub type PeepholeOptimizer<'a, 'b> =
|
||||
peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
|
||||
|
||||
pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
|
||||
crate::peepmatic::preopt(isa)
|
||||
}
|
||||
|
||||
pub fn apply_all<'a, 'b>(
|
||||
optimizer: &mut PeepholeOptimizer<'a, 'b>,
|
||||
pos: &mut FuncCursor<'a>,
|
||||
inst: Inst,
|
||||
_native_word_width: u32,
|
||||
) {
|
||||
// After we apply one optimization, that might make another
|
||||
// optimization applicable. Keep running the peephole optimizer
|
||||
// until either:
|
||||
//
|
||||
// * No optimization applied, and therefore it doesn't make sense to
|
||||
// try again, because no optimization will apply again.
|
||||
//
|
||||
// * Or when we replaced an instruction with an alias to an existing
|
||||
// value, because we already ran the peephole optimizer over the
|
||||
// aliased value's instruction in an early part of the traversal
|
||||
// over the function.
|
||||
while let Some(ValueOrInst::Inst(new_inst)) =
|
||||
optimizer.apply_one(pos, ValueOrInst::Inst(inst))
|
||||
{
|
||||
// We transplanted a new instruction into the current
|
||||
// instruction, so the "new" instruction is actually the same
|
||||
// one, just with different data.
|
||||
debug_assert_eq!(new_inst, inst);
|
||||
}
|
||||
debug_assert_eq!(pos.current_inst(), Some(inst));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "enable-peepmatic"))]
|
||||
mod simplify {
|
||||
use super::*;
|
||||
use crate::ir::{
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::{Opcode, ValueList},
|
||||
types::{B8, I16, I32, I8},
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct PeepholeOptimizer<'a, 'b> {
|
||||
phantom: PhantomData<(&'a (), &'b ())>,
|
||||
}
|
||||
|
||||
pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
|
||||
PeepholeOptimizer {
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_all<'a, 'b>(
|
||||
_optimizer: &mut PeepholeOptimizer<'a, 'b>,
|
||||
pos: &mut FuncCursor<'a>,
|
||||
inst: Inst,
|
||||
native_word_width: u32,
|
||||
) {
|
||||
simplify(pos, inst, native_word_width);
|
||||
branch_opt(pos, inst);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
|
||||
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = dfg[candidate_inst]
|
||||
{
|
||||
return Some(imm);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
|
||||
/// Returns true if the final instruction has been converted to such a move.
|
||||
fn try_fold_extended_move(
|
||||
pos: &mut FuncCursor,
|
||||
inst: Inst,
|
||||
opcode: Opcode,
|
||||
arg: Value,
|
||||
imm: immediates::Imm64,
|
||||
) -> bool {
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IshlImm,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if imm != *prev_imm {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let imm_bits: i64 = imm.into();
|
||||
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
|
||||
8 => I8,
|
||||
16 => I16,
|
||||
32 => I32,
|
||||
_ => return false,
|
||||
};
|
||||
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
|
||||
|
||||
// This becomes a no-op, since ireduce_ty has a smaller lane width than
|
||||
// the argument type (also the destination type).
|
||||
let arg = *prev_arg;
|
||||
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
|
||||
|
||||
if opcode == Opcode::UshrImm {
|
||||
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
|
||||
/// simplifications.
|
||||
///
|
||||
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
|
||||
/// controlling type's width of the instruction. This would result in an illegal instruction that
|
||||
/// would likely be expanded back into an instruction on smaller types with the same initial
|
||||
/// opcode, creating unnecessary churn.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
Opcode::Ifcmp => Opcode::IfcmpImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[0]);
|
||||
|
||||
// Repeat for BinaryImm simplification.
|
||||
simplify(pos, inst, native_word_width);
|
||||
}
|
||||
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
if let Opcode::AdjustSpDown = opcode {
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
|
||||
// Note this works for both positive and negative immediate values.
|
||||
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::BinaryImm64 { opcode, arg, imm } => {
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
|
||||
let mut arg = arg;
|
||||
let mut imm = imm;
|
||||
match opcode {
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BandImm
|
||||
| Opcode::BxorImm => {
|
||||
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: prev_opcode,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if opcode == *prev_opcode
|
||||
&& ty == pos.func.dfg.ctrl_typevar(arg_inst)
|
||||
{
|
||||
let lhs: i64 = imm.into();
|
||||
let rhs: i64 = (*prev_imm).into();
|
||||
let new_imm = match opcode {
|
||||
Opcode::BorImm => lhs | rhs,
|
||||
Opcode::BandImm => lhs & rhs,
|
||||
Opcode::BxorImm => lhs ^ rhs,
|
||||
Opcode::IaddImm => lhs.wrapping_add(rhs),
|
||||
Opcode::ImulImm => lhs.wrapping_mul(rhs),
|
||||
_ => panic!("can't happen"),
|
||||
};
|
||||
let new_imm = immediates::Imm64::from(new_imm);
|
||||
let new_arg = *prev_arg;
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(opcode, ty, new_imm, new_arg);
|
||||
imm = new_imm;
|
||||
arg = new_arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::UshrImm | Opcode::SshrImm => {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
|
||||
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Replace operations that are no-ops.
|
||||
match (opcode, imm.into()) {
|
||||
(Opcode::IaddImm, 0)
|
||||
| (Opcode::ImulImm, 1)
|
||||
| (Opcode::SdivImm, 1)
|
||||
| (Opcode::UdivImm, 1)
|
||||
| (Opcode::BorImm, 0)
|
||||
| (Opcode::BandImm, -1)
|
||||
| (Opcode::BxorImm, 0)
|
||||
| (Opcode::RotlImm, 0)
|
||||
| (Opcode::RotrImm, 0)
|
||||
| (Opcode::IshlImm, 0)
|
||||
| (Opcode::UshrImm, 0)
|
||||
| (Opcode::SshrImm, 0) => {
|
||||
// Alias the result value with the original argument.
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
|
||||
}
|
||||
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
|
||||
// Replace by zero.
|
||||
pos.func.dfg.replace(inst).iconst(ty, 0);
|
||||
}
|
||||
(Opcode::BorImm, -1) => {
|
||||
// Replace by minus one.
|
||||
pos.func.dfg.replace(inst).iconst(ty, -1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::CondTrap { .. }
|
||||
| InstructionData::Branch { .. }
|
||||
| InstructionData::Ternary {
|
||||
opcode: Opcode::Select,
|
||||
..
|
||||
} => {
|
||||
// Fold away a redundant `bint`.
|
||||
let condition_def = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
pos.func.dfg.value_def(args[0])
|
||||
};
|
||||
if let ValueDef::Result(def_inst, _) = condition_def {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Ternary {
|
||||
opcode: Opcode::Bitselect,
|
||||
args,
|
||||
} => {
|
||||
let old_cond_type = pos.func.dfg.value_type(args[0]);
|
||||
if !old_cond_type.is_vector() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Replace bitselect with vselect if each lane of controlling mask is either
|
||||
// all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
|
||||
// while vselect can be encoded using single BLEND instruction.
|
||||
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
|
||||
InstructionData::Unary {
|
||||
opcode: Opcode::RawBitcast,
|
||||
arg,
|
||||
} => {
|
||||
// If controlling mask is raw-bitcasted boolean vector then
|
||||
// we know each lane is either all zeroes or ones,
|
||||
// so we can use vselect instruction instead.
|
||||
let arg_type = pos.func.dfg.value_type(arg);
|
||||
if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
|
||||
return;
|
||||
}
|
||||
(arg, arg_type)
|
||||
}
|
||||
InstructionData::UnaryConst {
|
||||
opcode: Opcode::Vconst,
|
||||
constant_handle,
|
||||
} => {
|
||||
// If each byte of controlling mask is 0x00 or 0xFF then
|
||||
// we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
|
||||
// Bitselect operates at bit level, so the lane types don't matter.
|
||||
let const_data = pos.func.dfg.constants.get(constant_handle);
|
||||
if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
|
||||
return;
|
||||
}
|
||||
let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
|
||||
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
|
||||
let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
|
||||
let old_arg_type = pos.func.dfg.value_type(args[1]);
|
||||
|
||||
if arg_type != old_arg_type {
|
||||
// Operands types must match, we need to add bitcasts.
|
||||
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
|
||||
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
|
||||
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
||||
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
|
||||
} else {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.vselect(cond_val, args[1], args[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
struct BranchOptInfo {
|
||||
br_inst: Inst,
|
||||
cmp_arg: Value,
|
||||
args: ValueList,
|
||||
new_opcode: Opcode,
|
||||
}
|
||||
|
||||
/// Fold comparisons into branch operations when possible.
|
||||
///
|
||||
/// This matches against operations which compare against zero, then use the
|
||||
/// result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
/// single `brz` or `brnz`.
|
||||
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
|
||||
let mut info = if let InstructionData::Branch {
|
||||
opcode: br_opcode,
|
||||
args: ref br_args,
|
||||
..
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let first_arg = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
args[0]
|
||||
};
|
||||
|
||||
let icmp_inst =
|
||||
if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
icmp_inst
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::IntCompareImm {
|
||||
opcode: Opcode::IcmpImm,
|
||||
arg: cmp_arg,
|
||||
cond: cmp_cond,
|
||||
imm: cmp_imm,
|
||||
} = pos.func.dfg[icmp_inst]
|
||||
{
|
||||
let cmp_imm: i64 = cmp_imm.into();
|
||||
if cmp_imm != 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// icmp_imm returns non-zero when the comparison is true. So, if
|
||||
// we're branching on zero, we need to invert the condition.
|
||||
let cond = match br_opcode {
|
||||
Opcode::Brz => cmp_cond.inverse(),
|
||||
Opcode::Brnz => cmp_cond,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let new_opcode = match cond {
|
||||
IntCC::Equal => Opcode::Brz,
|
||||
IntCC::NotEqual => Opcode::Brnz,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
BranchOptInfo {
|
||||
br_inst: inst,
|
||||
cmp_arg,
|
||||
args: br_args.clone(),
|
||||
new_opcode,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
|
||||
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
|
||||
*opcode = info.new_opcode;
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main pre-opt pass.
|
||||
pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::preopt();
|
||||
|
||||
let mut pos = FuncCursor::new(func);
|
||||
let native_word_width = isa.pointer_bytes();
|
||||
let native_word_width = isa.pointer_bytes() as u32;
|
||||
let mut optimizer = simplify::peephole_optimizer(isa);
|
||||
|
||||
while let Some(block) = pos.next_block() {
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
// Apply basic simplifications.
|
||||
simplify(&mut pos, inst, native_word_width as u32);
|
||||
simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
|
||||
|
||||
// Try to transform divide-by-constant into simpler operations.
|
||||
if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
|
||||
|
@ -960,7 +1099,6 @@ pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn Targ
|
|||
continue;
|
||||
}
|
||||
|
||||
branch_opt(&mut pos, inst);
|
||||
branch_order(&mut pos, cfg, block, inst);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ define_passes! {
|
|||
gvn: "Global value numbering",
|
||||
licm: "Loop invariant code motion",
|
||||
unreachable_code: "Remove unreachable blocks",
|
||||
remove_constant_phis: "Remove constant phi-nodes",
|
||||
|
||||
regalloc: "Register allocation",
|
||||
ra_liveness: "RA liveness analysis",
|
||||
|
|
|
@ -18,9 +18,9 @@ use serde::{Deserialize, Serialize};
|
|||
pub struct ValueLocRange {
|
||||
/// The ValueLoc containing a ValueLabel during this range.
|
||||
pub loc: ValueLoc,
|
||||
/// The start of the range.
|
||||
/// The start of the range. It is an offset in the generated code.
|
||||
pub start: u32,
|
||||
/// The end of the range.
|
||||
/// The end of the range. It is an offset in the generated code.
|
||||
pub end: u32,
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,11 @@ pub fn build_value_labels_ranges<T>(
|
|||
where
|
||||
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
|
||||
{
|
||||
// FIXME(#1523): New-style backend does not yet have debug info.
|
||||
if isa.get_mach_backend().is_some() {
|
||||
return HashMap::new();
|
||||
}
|
||||
|
||||
let values_labels = build_value_labels_index::<T>(func);
|
||||
|
||||
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
|
||||
|
|
|
@ -756,10 +756,10 @@ impl<'a> Verifier<'a> {
|
|||
| UnaryIeee64 { .. }
|
||||
| UnaryBool { .. }
|
||||
| Binary { .. }
|
||||
| BinaryImm { .. }
|
||||
| BinaryImm8 { .. }
|
||||
| BinaryImm64 { .. }
|
||||
| Ternary { .. }
|
||||
| InsertLane { .. }
|
||||
| ExtractLane { .. }
|
||||
| TernaryImm8 { .. }
|
||||
| Shuffle { .. }
|
||||
| IntCompare { .. }
|
||||
| IntCompareImm { .. }
|
||||
|
@ -1912,20 +1912,20 @@ impl<'a> Verifier<'a> {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
ir::InstructionData::ExtractLane {
|
||||
ir::InstructionData::BinaryImm8 {
|
||||
opcode: ir::instructions::Opcode::Extractlane,
|
||||
lane,
|
||||
imm: lane,
|
||||
arg,
|
||||
..
|
||||
}
|
||||
| ir::InstructionData::InsertLane {
|
||||
| ir::InstructionData::TernaryImm8 {
|
||||
opcode: ir::instructions::Opcode::Insertlane,
|
||||
lane,
|
||||
imm: lane,
|
||||
args: [arg, _],
|
||||
..
|
||||
} => {
|
||||
// We must be specific about the opcodes above because other instructions are using
|
||||
// the ExtractLane/InsertLane formats.
|
||||
// the same formats.
|
||||
let ty = self.func.dfg.value_type(arg);
|
||||
if u16::from(lane) >= ty.lane_count() {
|
||||
errors.fatal((
|
||||
|
|
|
@ -508,7 +508,8 @@ pub fn write_operands(
|
|||
constant_handle, ..
|
||||
} => write!(w, " {}", constant_handle),
|
||||
Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]),
|
||||
BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
BinaryImm8 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
BinaryImm64 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]),
|
||||
MultiAry { ref args, .. } => {
|
||||
if args.is_empty() {
|
||||
|
@ -518,8 +519,7 @@ pub fn write_operands(
|
|||
}
|
||||
}
|
||||
NullAry { .. } => write!(w, " "),
|
||||
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
|
||||
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
|
||||
TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm),
|
||||
Shuffle { mask, args, .. } => {
|
||||
let data = dfg.immediates.get(mask).expect(
|
||||
"Expected the shuffle mask to already be inserted into the immediates table",
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"cd1dd7e4040349ff8e5e88cbc3273c2b52cb411853933de6aea8976a1a99445f","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"f6d738a46f1dca8b0c82a5910d86cd572a3585ab7ef9f73dac96962529069190","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"dccb3dd6fc87eba0101de56417f21cab67a4394831df9fa41e3bbddb70cdf694","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
||||
{"files":{"Cargo.toml":"c4ee5d42f3f76a1458ec0d97b5777569906819fe5b4002512de0e69814754c53","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-entity"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "Data structures using entity references as mapping keys"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-entity"
|
||||
|
|
|
@ -85,6 +85,10 @@ macro_rules! entity_impl {
|
|||
fn reserved_value() -> $entity {
|
||||
$entity($crate::__core::u32::MAX)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == $crate::__core::u32::MAX
|
||||
}
|
||||
}
|
||||
|
||||
impl $entity {
|
||||
|
|
|
@ -11,9 +11,11 @@ use core::fmt;
|
|||
use core::mem;
|
||||
|
||||
/// Types that have a reserved value which can't be created any other way.
|
||||
pub trait ReservedValue: Eq {
|
||||
pub trait ReservedValue {
|
||||
/// Create an instance of the reserved value.
|
||||
fn reserved_value() -> Self;
|
||||
/// Checks whether value is the reserved one.
|
||||
fn is_reserved_value(&self) -> bool;
|
||||
}
|
||||
|
||||
/// Packed representation of `Option<T>`.
|
||||
|
@ -23,12 +25,12 @@ pub struct PackedOption<T: ReservedValue>(T);
|
|||
impl<T: ReservedValue> PackedOption<T> {
|
||||
/// Returns `true` if the packed option is a `None` value.
|
||||
pub fn is_none(&self) -> bool {
|
||||
self.0 == T::reserved_value()
|
||||
self.0.is_reserved_value()
|
||||
}
|
||||
|
||||
/// Returns `true` if the packed option is a `Some` value.
|
||||
pub fn is_some(&self) -> bool {
|
||||
self.0 != T::reserved_value()
|
||||
!self.0.is_reserved_value()
|
||||
}
|
||||
|
||||
/// Expand the packed option into a normal `Option`.
|
||||
|
@ -75,7 +77,7 @@ impl<T: ReservedValue> From<T> for PackedOption<T> {
|
|||
/// Convert `t` into a packed `Some(x)`.
|
||||
fn from(t: T) -> Self {
|
||||
debug_assert!(
|
||||
t != T::reserved_value(),
|
||||
!t.is_reserved_value(),
|
||||
"Can't make a PackedOption from the reserved value."
|
||||
);
|
||||
Self(t)
|
||||
|
@ -123,6 +125,10 @@ mod tests {
|
|||
fn reserved_value() -> Self {
|
||||
NoC(13)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == 13
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -145,6 +151,10 @@ mod tests {
|
|||
fn reserved_value() -> Self {
|
||||
Ent(13)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == 13
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"d152c6553c0091b43d9ea0cd547dc49440e6321eb792bf47fdd3245aed046513","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"f750cc995c66635dab7f2b977266cf9235d984b585ab8145bdb858ea8e1b0fb4","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
||||
{"files":{"Cargo.toml":"084cc46ba2d09a2ee8085c37be8624b3cc249d381f1cbee6df468930ce15e415","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"d1d8477572f70cc28f71424af272d9eec0adf58af657ff153c4acbbb39822a50","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
description = "Cranelift IR builder helper"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-frontend"
|
||||
|
@ -11,7 +11,7 @@ readme = "README.md"
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false }
|
||||
target-lexicon = "0.10"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
|
|
|
@ -272,6 +272,12 @@ impl<'a> FunctionBuilder<'a> {
|
|||
|
||||
/// In order to use a variable in a `use_var`, you need to declare its type with this method.
|
||||
pub fn declare_var(&mut self, var: Variable, ty: Type) {
|
||||
debug_assert_eq!(
|
||||
self.func_ctx.types[var],
|
||||
types::INVALID,
|
||||
"variable {:?} is declared twice",
|
||||
var
|
||||
);
|
||||
self.func_ctx.types[var] = ty;
|
||||
}
|
||||
|
||||
|
@ -285,6 +291,12 @@ impl<'a> FunctionBuilder<'a> {
|
|||
var
|
||||
)
|
||||
});
|
||||
debug_assert_ne!(
|
||||
ty,
|
||||
types::INVALID,
|
||||
"variable {:?} is used but its type has not been declared",
|
||||
var
|
||||
);
|
||||
self.func_ctx
|
||||
.ssa
|
||||
.use_var(self.func, var, ty, self.position.unwrap())
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"107a12d0bc1ee99c8ffd9cf746c4d06040a90bd5769fc29d36a88371d09a67b2","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"cce724251d4abc08c6492e1e25c138ab5a0d11e9ac90bc573652b18e034f56ed","src/code_translator.rs":"4b70704fd50b24cd695d0a469d92f06d4a4fc328f24247a6c7a1ba39ac301ee0","src/environ/dummy.rs":"49bce7a8eb9f21a61c12db537b51ab6bdb3d0e1eb6253084268256d96cae68a5","src/environ/mod.rs":"b6f33f619090ff497b4e22150d77a290f259716374ac2e377b73c47cd1dafe85","src/environ/spec.rs":"3a1543f99bff340c7f6bbe3f7cb8e8ec829e4139957f3c578d5b03e29df50f9e","src/func_translator.rs":"a165063eafedbb8e6b632996f747eeb49a3d6f8a70cab6d741abfc4fd9af892d","src/lib.rs":"05b9994c062faf2065046d1e4d7caffb26823816f367d77ede6918e24fcfa6b0","src/module_translator.rs":"bcdf5a84226b726a73f4be0acb0318ca89c82584460101378e73021d85bd4485","src/sections_translator.rs":"8c4c24308332c63d16fcf19693a7ecff2239e73b4752b0d3830b273fabcee9f1","src/state/func_state.rs":"b114522784984a7cc26a3549c7c17f842885e1232254de81d938f9d155f95aa6","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"2f299b043deb806b48583fe54bbb46708f7d8a1454b7be0eb285568064e5a7f9","src/translation_utils.rs":"a1723cf6c216edd8aa845c61b80907167569f0c830344e0f2dc86a7232d45c5c","tests/wasm_testsuite.rs":"730304f139371e5ef3fd913ec271fc4db181869b447c6ed26c54313b5c31495c"},"package":null}
|
||||
{"files":{"Cargo.toml":"3dd16e5f91cb20bc9afaff9880e1035d1c33c68851f593e6f2c5a0c92e292133","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"cce724251d4abc08c6492e1e25c138ab5a0d11e9ac90bc573652b18e034f56ed","src/code_translator.rs":"f9befe6f5a53eede1e9937abe0bced442f8c0276996bfb4d77c27e81d4746b4f","src/environ/dummy.rs":"07b6510a7141b92769c914e37386790486f92b691beb0876b8590f2ae5489ee4","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"2ff8524cd592efdef67e5f8d06d144f7d628dee8183848ff4f5e35850f3ce550","src/func_translator.rs":"eb1fcea970407eda872984808e9a3e3a3297c2dea6e3a600ee7116ca89c7b49f","src/lib.rs":"6d3662b3f219a3f7a26f6b44b7921a19da1d892cf78f5a4434fdced5753b069f","src/module_translator.rs":"bcdf5a84226b726a73f4be0acb0318ca89c82584460101378e73021d85bd4485","src/sections_translator.rs":"db567511e273a9e383b18a15fc47f74a1247cbe13f120d7656c21660be53ab78","src/state/func_state.rs":"b114522784984a7cc26a3549c7c17f842885e1232254de81d938f9d155f95aa6","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"20082fded6a8d3637eccbda4465355d8d9fab0a1cd8222accb10cb3e06543689","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.63.0"
|
||||
version = "0.64.0"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
description = "Translator from WebAssembly to Cranelift IR"
|
||||
documentation = "https://docs.rs/cranelift-wasm"
|
||||
|
@ -12,20 +12,20 @@ keywords = ["webassembly", "wasm"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
wasmparser = { version = "0.51.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.63.0" }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.63.0", default-features = false }
|
||||
wasmparser = { version = "0.57.0", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.64.0" }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.64.0", default-features = false }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
thiserror = "1.0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
wat = "1.0.9"
|
||||
wat = "1.0.18"
|
||||
target-lexicon = "0.10"
|
||||
# Enable the riscv feature for cranelift-codegen, as some tests require it
|
||||
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false, features = ["riscv"] }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false, features = ["riscv"] }
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
|
|
|
@ -125,7 +125,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
GlobalVariable::Memory { gv, offset, ty } => {
|
||||
let addr = builder.ins().global_value(environ.pointer_type(), gv);
|
||||
let flags = ir::MemFlags::trusted();
|
||||
let val = state.pop1();
|
||||
let mut val = state.pop1();
|
||||
// Ensure SIMD values are cast to their default Cranelift type, I8x16.
|
||||
if ty.is_vector() {
|
||||
val = optionally_bitcast_vector(val, I8X16, builder);
|
||||
}
|
||||
debug_assert_eq!(ty, builder.func.dfg.value_type(val));
|
||||
builder.ins().store(flags, val, addr, offset);
|
||||
}
|
||||
|
@ -357,7 +361,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
// We signal that all the code that follows until the next End is unreachable
|
||||
frame.set_branched_to_exit();
|
||||
let return_count = if frame.is_loop() {
|
||||
0
|
||||
frame.num_param_values()
|
||||
} else {
|
||||
frame.num_return_values()
|
||||
};
|
||||
|
@ -1035,8 +1039,8 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
Operator::F32Le | Operator::F64Le => {
|
||||
translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
|
||||
}
|
||||
Operator::RefNull => state.push1(builder.ins().null(environ.reference_type())),
|
||||
Operator::RefIsNull => {
|
||||
Operator::RefNull { ty: _ } => state.push1(builder.ins().null(environ.reference_type())),
|
||||
Operator::RefIsNull { ty: _ } => {
|
||||
let arg = state.pop1();
|
||||
let val = builder.ins().is_null(arg);
|
||||
let val_int = builder.ins().bint(I32, val);
|
||||
|
@ -1167,23 +1171,26 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
)?);
|
||||
}
|
||||
Operator::TableGrow { table } => {
|
||||
let table_index = TableIndex::from_u32(*table);
|
||||
let delta = state.pop1();
|
||||
let init_value = state.pop1();
|
||||
state.push1(environ.translate_table_grow(
|
||||
builder.cursor(),
|
||||
*table,
|
||||
table_index,
|
||||
delta,
|
||||
init_value,
|
||||
)?);
|
||||
}
|
||||
Operator::TableGet { table } => {
|
||||
let table_index = TableIndex::from_u32(*table);
|
||||
let index = state.pop1();
|
||||
state.push1(environ.translate_table_get(builder.cursor(), *table, index)?);
|
||||
state.push1(environ.translate_table_get(builder.cursor(), table_index, index)?);
|
||||
}
|
||||
Operator::TableSet { table } => {
|
||||
let table_index = TableIndex::from_u32(*table);
|
||||
let value = state.pop1();
|
||||
let index = state.pop1();
|
||||
environ.translate_table_set(builder.cursor(), *table, value, index)?;
|
||||
environ.translate_table_set(builder.cursor(), table_index, value, index)?;
|
||||
}
|
||||
Operator::TableCopy {
|
||||
dst_table: dst_table_index,
|
||||
|
@ -1206,10 +1213,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
)?;
|
||||
}
|
||||
Operator::TableFill { table } => {
|
||||
let table_index = TableIndex::from_u32(*table);
|
||||
let len = state.pop1();
|
||||
let val = state.pop1();
|
||||
let dest = state.pop1();
|
||||
environ.translate_table_fill(builder.cursor(), *table, dest, val, len)?;
|
||||
environ.translate_table_fill(builder.cursor(), table_index, dest, val, len)?;
|
||||
}
|
||||
Operator::TableInit {
|
||||
segment,
|
||||
|
@ -1302,7 +1310,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
let ty = type_of(op);
|
||||
let reduced = builder.ins().ireduce(ty.lane_type(), replacement);
|
||||
let vector = optionally_bitcast_vector(vector, ty, builder);
|
||||
state.push1(builder.ins().insertlane(vector, *lane, reduced))
|
||||
state.push1(builder.ins().insertlane(vector, reduced, *lane))
|
||||
}
|
||||
Operator::I32x4ReplaceLane { lane }
|
||||
| Operator::I64x2ReplaceLane { lane }
|
||||
|
@ -1310,7 +1318,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
| Operator::F64x2ReplaceLane { lane } => {
|
||||
let (vector, replacement) = state.pop2();
|
||||
let vector = optionally_bitcast_vector(vector, type_of(op), builder);
|
||||
state.push1(builder.ins().insertlane(vector, *lane, replacement))
|
||||
state.push1(builder.ins().insertlane(vector, replacement, *lane))
|
||||
}
|
||||
Operator::V8x16Shuffle { lanes, .. } => {
|
||||
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
|
||||
|
@ -1375,7 +1383,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
let a = pop1_with_bitcast(state, type_of(op), builder);
|
||||
state.push1(builder.ins().ineg(a))
|
||||
}
|
||||
Operator::I16x8Mul | Operator::I32x4Mul => {
|
||||
Operator::I16x8Mul | Operator::I32x4Mul | Operator::I64x2Mul => {
|
||||
let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
|
||||
state.push1(builder.ins().imul(a, b))
|
||||
}
|
||||
|
@ -1402,7 +1410,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
Operator::I8x16Shl | Operator::I16x8Shl | Operator::I32x4Shl | Operator::I64x2Shl => {
|
||||
let (a, b) = state.pop2();
|
||||
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
|
||||
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
|
||||
let bitwidth = i64::from(type_of(op).lane_bits());
|
||||
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
|
||||
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
|
||||
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
|
||||
|
@ -1411,16 +1419,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
Operator::I8x16ShrU | Operator::I16x8ShrU | Operator::I32x4ShrU | Operator::I64x2ShrU => {
|
||||
let (a, b) = state.pop2();
|
||||
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
|
||||
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
|
||||
let bitwidth = i64::from(type_of(op).lane_bits());
|
||||
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
|
||||
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
|
||||
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
|
||||
state.push1(builder.ins().ushr(bitcast_a, b_mod_bitwidth))
|
||||
}
|
||||
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS => {
|
||||
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS | Operator::I64x2ShrS => {
|
||||
let (a, b) = state.pop2();
|
||||
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
|
||||
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
|
||||
let bitwidth = i64::from(type_of(op).lane_bits());
|
||||
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
|
||||
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
|
||||
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
|
||||
|
@ -1435,18 +1443,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
// operands must match (hence the bitcast).
|
||||
state.push1(builder.ins().bitselect(bitcast_c, bitcast_a, bitcast_b))
|
||||
}
|
||||
Operator::I8x16AnyTrue
|
||||
| Operator::I16x8AnyTrue
|
||||
| Operator::I32x4AnyTrue
|
||||
| Operator::I64x2AnyTrue => {
|
||||
Operator::I8x16AnyTrue | Operator::I16x8AnyTrue | Operator::I32x4AnyTrue => {
|
||||
let a = pop1_with_bitcast(state, type_of(op), builder);
|
||||
let bool_result = builder.ins().vany_true(a);
|
||||
state.push1(builder.ins().bint(I32, bool_result))
|
||||
}
|
||||
Operator::I8x16AllTrue
|
||||
| Operator::I16x8AllTrue
|
||||
| Operator::I32x4AllTrue
|
||||
| Operator::I64x2AllTrue => {
|
||||
Operator::I8x16AllTrue | Operator::I16x8AllTrue | Operator::I32x4AllTrue => {
|
||||
let a = pop1_with_bitcast(state, type_of(op), builder);
|
||||
let bool_result = builder.ins().vall_true(a);
|
||||
state.push1(builder.ins().bint(I32, bool_result))
|
||||
|
@ -1542,16 +1544,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
let a = pop1_with_bitcast(state, I32X4, builder);
|
||||
state.push1(builder.ins().fcvt_from_sint(F32X4, a))
|
||||
}
|
||||
Operator::I8x16Mul
|
||||
| Operator::I64x2Mul
|
||||
| Operator::I64x2ShrS
|
||||
| Operator::I32x4TruncSatF32x4S
|
||||
Operator::I32x4TruncSatF32x4S
|
||||
| Operator::I32x4TruncSatF32x4U
|
||||
| Operator::I64x2TruncSatF64x2S
|
||||
| Operator::I64x2TruncSatF64x2U
|
||||
| Operator::F32x4ConvertI32x4U
|
||||
| Operator::F64x2ConvertI64x2S
|
||||
| Operator::F64x2ConvertI64x2U { .. }
|
||||
| Operator::I8x16Abs
|
||||
| Operator::I16x8Abs
|
||||
| Operator::I32x4Abs
|
||||
| Operator::I8x16NarrowI16x8S { .. }
|
||||
| Operator::I8x16NarrowI16x8U { .. }
|
||||
| Operator::I16x8NarrowI32x4S { .. }
|
||||
|
@ -1566,6 +1564,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||
| Operator::I32x4WidenHighI16x8U { .. } => {
|
||||
return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
|
||||
}
|
||||
|
||||
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
|
||||
return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1991,8 +1993,7 @@ fn type_of(operator: &Operator) -> Type {
|
|||
| Operator::I8x16MinU
|
||||
| Operator::I8x16MaxS
|
||||
| Operator::I8x16MaxU
|
||||
| Operator::I8x16RoundingAverageU
|
||||
| Operator::I8x16Mul => I8X16,
|
||||
| Operator::I8x16RoundingAverageU => I8X16,
|
||||
|
||||
Operator::I16x8Splat
|
||||
| Operator::V16x8LoadSplat { .. }
|
||||
|
@ -2063,15 +2064,12 @@ fn type_of(operator: &Operator) -> Type {
|
|||
| Operator::I64x2ExtractLane { .. }
|
||||
| Operator::I64x2ReplaceLane { .. }
|
||||
| Operator::I64x2Neg
|
||||
| Operator::I64x2AnyTrue
|
||||
| Operator::I64x2AllTrue
|
||||
| Operator::I64x2Shl
|
||||
| Operator::I64x2ShrS
|
||||
| Operator::I64x2ShrU
|
||||
| Operator::I64x2Add
|
||||
| Operator::I64x2Sub
|
||||
| Operator::F64x2ConvertI64x2S
|
||||
| Operator::F64x2ConvertI64x2U => I64X2,
|
||||
| Operator::I64x2Mul => I64X2,
|
||||
|
||||
Operator::F32x4Splat
|
||||
| Operator::F32x4ExtractLane { .. }
|
||||
|
@ -2111,9 +2109,7 @@ fn type_of(operator: &Operator) -> Type {
|
|||
| Operator::F64x2Mul
|
||||
| Operator::F64x2Div
|
||||
| Operator::F64x2Min
|
||||
| Operator::F64x2Max
|
||||
| Operator::I64x2TruncSatF64x2S
|
||||
| Operator::I64x2TruncSatF64x2U => F64X2,
|
||||
| Operator::F64x2Max => F64X2,
|
||||
|
||||
_ => unimplemented!(
|
||||
"Currently only SIMD instructions are mapped to their return type; the \
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
//! [Wasmtime]: https://github.com/bytecodealliance/wasmtime
|
||||
|
||||
use crate::environ::{
|
||||
FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment, WasmResult,
|
||||
FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment,
|
||||
WasmFuncType, WasmResult,
|
||||
};
|
||||
use crate::func_translator::FuncTranslator;
|
||||
use crate::state::ModuleTranslationState;
|
||||
|
@ -433,7 +434,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
|
|||
fn translate_table_grow(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
_table_index: u32,
|
||||
_table_index: TableIndex,
|
||||
_delta: ir::Value,
|
||||
_init_value: ir::Value,
|
||||
) -> WasmResult<ir::Value> {
|
||||
|
@ -443,7 +444,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
|
|||
fn translate_table_get(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
_table_index: u32,
|
||||
_table_index: TableIndex,
|
||||
_index: ir::Value,
|
||||
) -> WasmResult<ir::Value> {
|
||||
Ok(pos.ins().null(self.reference_type()))
|
||||
|
@ -452,7 +453,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
|
|||
fn translate_table_set(
|
||||
&mut self,
|
||||
_pos: FuncCursor,
|
||||
_table_index: u32,
|
||||
_table_index: TableIndex,
|
||||
_value: ir::Value,
|
||||
_index: ir::Value,
|
||||
) -> WasmResult<()> {
|
||||
|
@ -476,7 +477,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
|
|||
fn translate_table_fill(
|
||||
&mut self,
|
||||
_pos: FuncCursor,
|
||||
_table_index: u32,
|
||||
_table_index: TableIndex,
|
||||
_dst: ir::Value,
|
||||
_val: ir::Value,
|
||||
_len: ir::Value,
|
||||
|
@ -534,7 +535,7 @@ impl TargetEnvironment for DummyEnvironment {
|
|||
}
|
||||
|
||||
impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
|
||||
fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()> {
|
||||
fn declare_signature(&mut self, _wasm: &WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
|
||||
self.info.signatures.push(sig);
|
||||
Ok(())
|
||||
}
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче