Bug 1641504: Bump Cranelift to e3d89c8a92a5fadedd75359b8485d23ac45ecf29. r=bbouvier

Differential Revision: https://phabricator.services.mozilla.com/D78587
This commit is contained in:
Chris Fallin 2020-06-09 22:37:06 +00:00
Родитель e12b4c3ef8
Коммит 6944be383a
146 изменённых файлов: 19611 добавлений и 9495 удалений

Просмотреть файл

@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
[source."https://github.com/bytecodealliance/wasmtime"]
git = "https://github.com/bytecodealliance/wasmtime"
replace-with = "vendored-sources"
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
[source."https://github.com/badboy/failure"]
git = "https://github.com/badboy/failure"

46
Cargo.lock сгенерированный
Просмотреть файл

@ -763,22 +763,22 @@ dependencies = [
[[package]]
name = "cranelift-bforest"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
dependencies = [
"cranelift-entity 0.63.0",
"cranelift-entity 0.64.0",
]
[[package]]
name = "cranelift-codegen"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
dependencies = [
"byteorder",
"cranelift-bforest",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-entity 0.63.0",
"cranelift-entity 0.64.0",
"log",
"regalloc",
"smallvec",
@ -788,17 +788,17 @@ dependencies = [
[[package]]
name = "cranelift-codegen-meta"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
dependencies = [
"cranelift-codegen-shared",
"cranelift-entity 0.63.0",
"cranelift-entity 0.64.0",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
[[package]]
name = "cranelift-entity"
@ -807,13 +807,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9
[[package]]
name = "cranelift-entity"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
[[package]]
name = "cranelift-frontend"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
dependencies = [
"cranelift-codegen",
"log",
@ -823,15 +823,15 @@ dependencies = [
[[package]]
name = "cranelift-wasm"
version = "0.63.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
version = "0.64.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
dependencies = [
"cranelift-codegen",
"cranelift-entity 0.63.0",
"cranelift-entity 0.64.0",
"cranelift-frontend",
"log",
"thiserror",
"wasmparser 0.51.4",
"wasmparser 0.57.0",
]
[[package]]
@ -3956,9 +3956,9 @@ dependencies = [
[[package]]
name = "regalloc"
version = "0.0.21"
version = "0.0.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b27b256b41986ac5141b37b8bbba85d314fbf546c182eb255af6720e07e4f804"
checksum = "cca5b48c9db66c5ba084e4660b4c0cfe8b551a96074bc04b7c11de86ad0bf1f9"
dependencies = [
"log",
"rustc-hash",
@ -5375,9 +5375,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
[[package]]
name = "wasmparser"
version = "0.51.4"
version = "0.57.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aeb1956b19469d1c5e63e459d29e7b5aa0f558d9f16fcef09736f8a265e6c10a"
checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
[[package]]
name = "wast"

Просмотреть файл

@ -76,8 +76,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f
[patch.crates-io.cranelift-codegen]
git = "https://github.com/bytecodealliance/wasmtime"
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
[patch.crates-io.cranelift-wasm]
git = "https://github.com/bytecodealliance/wasmtime"
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"

Просмотреть файл

@ -13,8 +13,8 @@ name = "baldrdash"
# cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
# you should update the following $TOP_LEVEL/Cargo.toml file: look for the
# revision (rev) hashes of both cranelift dependencies (codegen and wasm).
cranelift-codegen = { version = "0.63.0", default-features = false }
cranelift-wasm = "0.63.0"
cranelift-codegen = { version = "0.64.0", default-features = false }
cranelift-wasm = "0.64.0"
log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
env_logger = "0.6"
smallvec = "1.0"

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"07d7670bb6f0c26fa3abb5d547d645b8b6ab32378dba33e3453122c8ba59c6b5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
{"files":{"Cargo.toml":"fe108380fdfaac0d92a92302d0751df182b888e874e56e465f4241dbb670a92e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}

Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-bforest"
version = "0.63.0"
version = "0.64.0"
description = "A forest of B+-trees"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-bforest"
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
edition = "2018"
[dependencies]
cranelift-entity = { path = "../entity", version = "0.63.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.64.0", default-features = false }
[badges]
maintenance = { status = "experimental" }

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"2d1fae4231bb7d3c43ebcaccbc62d243440ab537a5b6bd40c653ece0bcda5a75","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"b275053977c0239211c1df35253154ba4dce2519f506088e71104de37d3db862","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"fd5a7418fa0d47cdf1b823b31553f1549c03e160ffffac9e22d611185774367e","src/isa/x86/encodings.rs":"a19e5dd7ba7fe74f2ec0a2367e61e2dab498113f8b2a2f1bc677b6ee486358d5","src/isa/x86/instructions.rs":"144e83591444115f2ab8d16777e322eb5c9d8eef123ad05d0c66811a029b662b","src/isa/x86/legalize.rs":"d2eb6cee5c885870250417f4d9086527c96f994542c9316baf14776b500e45b0","src/isa/x86/mod.rs":"65953f998ff3fc3b333167e9979fc0f15f976b51ad75272ac19dcaad0981b371","src/isa/x86/opcodes.rs":"44556abfc4a319a6e48aa878f10550b7878725ba0bf75ddc9bb6a0e6f4223c73","src/isa/x86/recipes.rs":"f142ae4ea1db29df0f3c9aedf0c5ee228682136526499f0c85aab101375d0c8c","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"49abb46533b3a5415cd033e0a98b5c9561e231f2dd9510d587dc69b204bb6706","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"89ed4074f748637adf56b93ba952e398c45d43e6326d01676885939e3fe8bc4a","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"8df3abeb47b52b7dc99f6e0bb16cf8a695ce4fe0a8d86035945a2612d1aa5a6d","src/shared/legalize.rs":"bc9c3292446c1d338df1c4ce19f3ac5482cfe582a04a5a1e82fc9aaa6aef25ea","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"9460758f04ccfc9129ea4d4081571fe4a3ac574c3d25b6473f888fbbb506b9d3","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
{"files":{"Cargo.toml":"a19ba59829e25d67120787a454038986a6759f7d592dcf427924ebbcb5de6697","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"87c70a4856bb1c40ba6babed549aa7e01478375244dea605be0334ae6d0441e0","src/isa/x86/instructions.rs":"a2c81ff80e30980fe444aa1e56ba57c54911cee67c392c16bfbdf28f75151dc6","src/isa/x86/legalize.rs":"b5f68ea089c4237c7140ef0b8ff71f7c6a5f53884bf2158d81b52d3750bcacac","src/isa/x86/mod.rs":"ecc1d4de51bd44dbaa864fafebb68f66bc99fb8c9ad67a0fcb420bd1f87d1524","src/isa/x86/opcodes.rs":"f98dd104910efbfa3c211080c68a17da607ce585b9d81bf22cb255e58e51f99f","src/isa/x86/recipes.rs":"b71a3746ed39b08932dc1a0ce885b61eec2e8daf2e92d12eccc0d085e4587a1f","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"69623c2193458c838617e52e88d3ff91b71f3f07aec1f1494c0cabd7c332ad49","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38b9a3b09bd86d020b841abe94eef003063b2cb12d9dc991a7743b2cc0bb3362","src/shared/legalize.rs":"55b186e09383cc16491a6a0dd79aa9149c1aba1927a7173701478818b8116795","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"0b4f903de5f2df19304c44bf4bd456c3a8e165103b38ccb13b6f88ae8a3c7ee8","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}

Просмотреть файл

@ -1,19 +1,19 @@
[package]
name = "cranelift-codegen-meta"
authors = ["The Cranelift Project Developers"]
version = "0.63.0"
version = "0.64.0"
description = "Metaprogram for cranelift-codegen code generator library"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"
readme = "README.md"
edition = "2018"
[package.metadata.docs.rs]
rustdoc-args = [ "--document-private-items" ]
[dependencies]
cranelift-codegen-shared = { path = "../shared", version = "0.63.0" }
cranelift-entity = { path = "../../entity", version = "0.63.0" }
cranelift-codegen-shared = { path = "../shared", version = "0.64.0" }
cranelift-entity = { path = "../../entity", version = "0.64.0" }
[badges]
maintenance = { status = "experimental" }
[package.metadata.docs.rs]
rustdoc-args = [ "--document-private-items" ]

Просмотреть файл

@ -874,17 +874,32 @@ fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
args.join(", ")
);
let imms_need_sign_extension = format
.imm_fields
.iter()
.any(|f| f.kind.rust_type == "ir::immediates::Imm64");
fmt.doc_comment(format.to_string());
fmt.line("#[allow(non_snake_case)]");
fmtln!(fmt, "fn {} {{", proto);
fmt.indent(|fmt| {
// Generate the instruction data.
fmtln!(fmt, "let data = ir::InstructionData::{} {{", format.name);
fmtln!(
fmt,
"let{} data = ir::InstructionData::{} {{",
if imms_need_sign_extension { " mut" } else { "" },
format.name
);
fmt.indent(|fmt| {
fmt.line("opcode,");
gen_member_inits(format, fmt);
});
fmtln!(fmt, "};");
if imms_need_sign_extension {
fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
}
fmt.line("self.build(data, ctrl_typevar)");
});
fmtln!(fmt, "}");

Просмотреть файл

@ -64,7 +64,7 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
// R-type with an immediate shift amount instead of rs2.
recipes.push(
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4)
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
.operands_in(vec![gpr])
.operands_out(vec![gpr])
.emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
@ -79,11 +79,11 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
);
recipes.push(
EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4)
EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
.operands_in(vec![gpr])
.operands_out(vec![gpr])
.inst_predicate(InstructionPredicate::new_is_signed_int(
&*formats.binary_imm,
&*formats.binary_imm64,
"imm",
12,
0,

Просмотреть файл

@ -689,6 +689,12 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
}
}
}
for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
e.enc_both(
bint.bind(*to).bind(*from),
rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
);
}
// Copy Special
// For x86-64, only define REX forms for now, since we can't describe the
@ -1448,6 +1454,7 @@ fn define_alu(
// x86 has a bitwise not instruction NOT.
e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
// Also add a `b1` encodings for the logic instructions.
// TODO: Should this be done with 8-bit instructions? It would improve partial register
@ -1487,8 +1494,13 @@ fn define_alu(
for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
// Cannot use enc_i32_i64 for this pattern because instructions require
// to bind any.
e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
e.enc32(
inst.bind(I32).bind(Any),
inst.bind(I32).bind(I16),
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
);
e.enc32(
inst.bind(I32).bind(I32),
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
);
e.enc64(
@ -1601,8 +1613,11 @@ fn define_simd(
let sadd_sat = shared.by_name("sadd_sat");
let scalar_to_vector = shared.by_name("scalar_to_vector");
let sload8x8 = shared.by_name("sload8x8");
let sload8x8_complex = shared.by_name("sload8x8_complex");
let sload16x4 = shared.by_name("sload16x4");
let sload16x4_complex = shared.by_name("sload16x4_complex");
let sload32x2 = shared.by_name("sload32x2");
let sload32x2_complex = shared.by_name("sload32x2_complex");
let spill = shared.by_name("spill");
let sqrt = shared.by_name("sqrt");
let sshr_imm = shared.by_name("sshr_imm");
@ -1611,11 +1626,15 @@ fn define_simd(
let store_complex = shared.by_name("store_complex");
let uadd_sat = shared.by_name("uadd_sat");
let uload8x8 = shared.by_name("uload8x8");
let uload8x8_complex = shared.by_name("uload8x8_complex");
let uload16x4 = shared.by_name("uload16x4");
let uload16x4_complex = shared.by_name("uload16x4_complex");
let uload32x2 = shared.by_name("uload32x2");
let uload32x2_complex = shared.by_name("uload32x2_complex");
let ushr_imm = shared.by_name("ushr_imm");
let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst");
let vselect = shared.by_name("vselect");
let x86_insertps = x86.by_name("x86_insertps");
let x86_movlhps = x86.by_name("x86_movlhps");
let x86_movsd = x86.by_name("x86_movsd");
@ -1626,6 +1645,8 @@ fn define_simd(
let x86_pmaxu = x86.by_name("x86_pmaxu");
let x86_pmins = x86.by_name("x86_pmins");
let x86_pminu = x86.by_name("x86_pminu");
let x86_pmullq = x86.by_name("x86_pmullq");
let x86_pmuludq = x86.by_name("x86_pmuludq");
let x86_pshufb = x86.by_name("x86_pshufb");
let x86_pshufd = x86.by_name("x86_pshufd");
let x86_psll = x86.by_name("x86_psll");
@ -1636,6 +1657,7 @@ fn define_simd(
let x86_punpckl = x86.by_name("x86_punpckl");
// Shorthands for recipes.
let rec_blend = r.template("blend");
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
let rec_f_ib = r.template("f_ib");
let rec_fa = r.template("fa");
@ -1705,6 +1727,20 @@ fn define_simd(
e.enc_both_inferred(instruction, template);
}
// SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
// either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
// for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let opcode = match ty.lane_bits() {
32 => &BLENDVPS,
64 => &BLENDVPD,
_ => &PBLENDVB,
};
let instruction = vselect.bind(vector(ty, sse_vector_size));
let template = rec_blend.opcodes(opcode);
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
}
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
// written to the low doubleword of the register and the register is zero-extended to 128 bits."
@ -1977,6 +2013,35 @@ fn define_simd(
}
}
// SIMD load extend (complex addressing)
let is_load_complex_length_two =
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
for (inst, opcodes) in &[
(uload8x8_complex, &PMOVZXBW),
(uload16x4_complex, &PMOVZXWD),
(uload32x2_complex, &PMOVZXDQ),
(sload8x8_complex, &PMOVSXBW),
(sload16x4_complex, &PMOVSXWD),
(sload32x2_complex, &PMOVSXDQ),
] {
for recipe in &[
rec_fldWithIndex,
rec_fldWithIndexDisp8,
rec_fldWithIndexDisp32,
] {
let template = recipe.opcodes(*opcodes);
let predicate = |encoding: EncodingBuilder| {
encoding
.isa_predicate(use_sse41_simd)
.inst_predicate(is_load_complex_length_two.clone())
};
e.enc32_func(inst.clone(), template.clone(), predicate);
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
e.enc64_func(inst.clone(), template.rex(), predicate);
e.enc64_func(inst.clone(), template, predicate);
}
}
// SIMD integer addition
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
let iadd = iadd.bind(vector(*ty, sse_vector_size));
@ -2036,12 +2101,14 @@ fn define_simd(
e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
}
// SIMD multiplication with lane expansion.
e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
// SIMD integer multiplication for I64x2 using a AVX512.
{
let imul = imul.bind(vector(I64, sse_vector_size));
e.enc_32_64_maybe_isap(
imul,
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
x86_pmullq,
rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
);
}
@ -2117,8 +2184,11 @@ fn define_simd(
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
// One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
if *ty != I64 {
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
}
}
// SIMD integer comparisons
@ -2223,8 +2293,7 @@ fn define_entity_ref(
let rec_gvaddr8 = r.template("gvaddr8");
let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
let rec_spaddr4_id = r.template("spaddr4_id");
let rec_spaddr8_id = r.template("spaddr8_id");
let rec_spaddr_id = r.template("spaddr_id");
// Predicates shorthands.
let all_ones_funcaddrs_and_not_is_pic =
@ -2312,8 +2381,8 @@ fn define_entity_ref(
//
// TODO: Add encoding rules for stack_load and stack_store, so that they
// don't get legalized to stack_addr + load/store.
e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
// Constant addresses (PIC).
e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());

Просмотреть файл

@ -283,7 +283,7 @@ pub(crate) fn define(
Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
register and re-orders the data according to the passed immediate byte.
"#,
&formats.extract_lane,
&formats.binary_imm8,
)
.operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
.operands_out(vec![a]),
@ -314,7 +314,7 @@ pub(crate) fn define(
The lane index, ``Idx``, is an immediate value, not an SSA value. It
must indicate a valid lane index for the type of ``x``.
"#,
&formats.extract_lane,
&formats.binary_imm8,
)
.operands_in(vec![x, Idx])
.operands_out(vec![a]),
@ -342,9 +342,9 @@ pub(crate) fn define(
The lane index, ``Idx``, is an immediate value, not an SSA value. It
must indicate a valid lane index for the type of ``x``.
"#,
&formats.insert_lane,
&formats.ternary_imm8,
)
.operands_in(vec![x, Idx, y])
.operands_in(vec![x, y, Idx])
.operands_out(vec![a]),
);
@ -369,9 +369,9 @@ pub(crate) fn define(
extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
floats, which are already stored in an XMM register.
"#,
&formats.insert_lane,
&formats.ternary_imm8,
)
.operands_in(vec![x, Idx, y])
.operands_in(vec![x, y, Idx])
.operands_out(vec![a]),
);
@ -475,10 +475,11 @@ pub(crate) fn define(
.includes_scalars(false)
.build(),
);
let I64x2 = &TypeVar::new(
"I64x2",
"A SIMD vector type containing one large integer (the upper lane is concatenated with \
the lower lane to form the integer)",
let I128 = &TypeVar::new(
"I128",
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
upper lane is concatenated with the lower lane to form the integer)",
TypeSetBuilder::new()
.ints(64..64)
.simd_lanes(2..2)
@ -487,7 +488,7 @@ pub(crate) fn define(
);
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
let a = &Operand::new("a", IxN);
ig.push(
@ -532,6 +533,47 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let I64x2 = &TypeVar::new(
"I64x2",
"A SIMD vector type containing two 64-bit integers",
TypeSetBuilder::new()
.ints(64..64)
.simd_lanes(2..2)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I64x2);
let y = &Operand::new("y", I64x2);
let a = &Operand::new("a", I64x2);
ig.push(
Inst::new(
"x86_pmullq",
r#"
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
encodings for CPUs with newer vector features.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_pmuludq",
r#"
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
overflow as in `x86_pmullq`.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let x = &Operand::new("x", TxN);
let y = &Operand::new("y", TxN);
let f = &Operand::new("f", iflags);

Просмотреть файл

@ -8,7 +8,7 @@ use crate::shared::Definitions as SharedDefinitions;
#[allow(clippy::many_single_char_names)]
pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
let mut group = TransformGroupBuilder::new(
let mut expand = TransformGroupBuilder::new(
"x86_expand",
r#"
Legalize instructions by expansion.
@ -18,6 +18,37 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
.isa("x86")
.chain_with(shared.transform_groups.by_name("expand_flags").id);
let mut narrow = TransformGroupBuilder::new(
"x86_narrow",
r#"
Legalize instructions by narrowing.
Use x86-specific instructions if needed."#,
)
.isa("x86")
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
let mut narrow_avx = TransformGroupBuilder::new(
"x86_narrow_avx",
r#"
Legalize instructions by narrowing with CPU feature checks.
This special case converts using x86 AVX instructions where available."#,
)
.isa("x86");
// We cannot chain with the x86_narrow group until this group is built, see bottom of this
// function for where this is chained.
let mut widen = TransformGroupBuilder::new(
"x86_widen",
r#"
Legalize instructions by widening.
Use x86-specific instructions if needed."#,
)
.isa("x86")
.chain_with(shared.transform_groups.by_name("widen").id);
// List of instructions.
let insts = &shared.instructions;
let band = insts.by_name("band");
@ -37,6 +68,8 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let imul = insts.by_name("imul");
let ineg = insts.by_name("ineg");
let isub = insts.by_name("isub");
let ishl = insts.by_name("ishl");
let ireduce = insts.by_name("ireduce");
let popcnt = insts.by_name("popcnt");
let sdiv = insts.by_name("sdiv");
let selectif = insts.by_name("selectif");
@ -45,6 +78,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let tls_value = insts.by_name("tls_value");
let udiv = insts.by_name("udiv");
let umulhi = insts.by_name("umulhi");
let ushr = insts.by_name("ushr");
let ushr_imm = insts.by_name("ushr_imm");
let urem = insts.by_name("urem");
@ -55,14 +89,40 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let imm = &shared.imm;
// Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
// the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
// not encodable.
let a = var("a");
let x = var("x");
let y = var("y");
let z = var("z");
for &ty in &[I8, I16, I32] {
let ishl_by_i64 = ishl.bind(ty).bind(I64);
let ireduce = ireduce.bind(I32);
expand.legalize(
def!(a = ishl_by_i64(x, y)),
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
);
}
for &ty in &[I8, I16, I32] {
let ushr_by_i64 = ushr.bind(ty).bind(I64);
let ireduce = ireduce.bind(I32);
expand.legalize(
def!(a = ushr_by_i64(x, y)),
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
);
}
// Division and remainder.
//
// The srem expansion requires custom code because srem INT_MIN, -1 is not
// allowed to trap. The other ops need to check avoid_div_traps.
group.custom_legalize(sdiv, "expand_sdivrem");
group.custom_legalize(srem, "expand_sdivrem");
group.custom_legalize(udiv, "expand_udivrem");
group.custom_legalize(urem, "expand_udivrem");
expand.custom_legalize(sdiv, "expand_sdivrem");
expand.custom_legalize(srem, "expand_sdivrem");
expand.custom_legalize(udiv, "expand_udivrem");
expand.custom_legalize(urem, "expand_udivrem");
// Double length (widening) multiplication.
let a = var("a");
@ -73,12 +133,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let res_lo = var("res_lo");
let res_hi = var("res_hi");
group.legalize(
expand.legalize(
def!(res_hi = umulhi(x, y)),
vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
);
group.legalize(
expand.legalize(
def!(res_hi = smulhi(x, y)),
vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
);
@ -97,7 +157,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
// Equality needs an explicit `ord` test which checks the parity bit.
group.legalize(
expand.legalize(
def!(a = fcmp(floatcc_eq, x, y)),
vec![
def!(a1 = fcmp(floatcc_ord, x, y)),
@ -105,7 +165,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
def!(a = band(a1, a2)),
],
);
group.legalize(
expand.legalize(
def!(a = fcmp(floatcc_ne, x, y)),
vec![
def!(a1 = fcmp(floatcc_uno, x, y)),
@ -130,20 +190,20 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
(floatcc_ugt, floatcc_ult),
(floatcc_uge, floatcc_ule),
] {
group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
}
// We need to modify the CFG for min/max legalization.
group.custom_legalize(fmin, "expand_minmax");
group.custom_legalize(fmax, "expand_minmax");
expand.custom_legalize(fmin, "expand_minmax");
expand.custom_legalize(fmax, "expand_minmax");
// Conversions from unsigned need special handling.
group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
// Conversions from float to int can trap and modify the control flow graph.
group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
// Count leading and trailing zeroes, for baseline x86_64
let c_minus_one = var("c_minus_one");
@ -158,7 +218,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
let imm64_minus_one = Literal::constant(&imm.imm64, -1);
let imm64_63 = Literal::constant(&imm.imm64, 63);
group.legalize(
expand.legalize(
def!(a = clz.I64(x)),
vec![
def!(c_minus_one = iconst(imm64_minus_one)),
@ -170,7 +230,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
let imm64_31 = Literal::constant(&imm.imm64, 31);
group.legalize(
expand.legalize(
def!(a = clz.I32(x)),
vec![
def!(c_minus_one = iconst(imm64_minus_one)),
@ -182,7 +242,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
let imm64_64 = Literal::constant(&imm.imm64, 64);
group.legalize(
expand.legalize(
def!(a = ctz.I64(x)),
vec![
def!(c_sixty_four = iconst(imm64_64)),
@ -192,7 +252,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
let imm64_32 = Literal::constant(&imm.imm64, 32);
group.legalize(
expand.legalize(
def!(a = ctz.I32(x)),
vec![
def!(c_thirty_two = iconst(imm64_32)),
@ -225,7 +285,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let imm64_1 = Literal::constant(&imm.imm64, 1);
let imm64_4 = Literal::constant(&imm.imm64, 4);
group.legalize(
expand.legalize(
def!(r = popcnt.I64(x)),
vec![
def!(qv3 = ushr_imm(x, imm64_1)),
@ -266,7 +326,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let lc0F = var("lc0F");
let lc01 = var("lc01");
group.legalize(
expand.legalize(
def!(r = popcnt.I32(x)),
vec![
def!(lv3 = ushr_imm(x, imm64_1)),
@ -289,31 +349,27 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
],
);
group.custom_legalize(ineg, "convert_ineg");
group.custom_legalize(tls_value, "expand_tls_value");
group.build_and_add_to(&mut shared.transform_groups);
let mut widen = TransformGroupBuilder::new(
"x86_widen",
r#"
Legalize instructions by widening.
Use x86-specific instructions if needed."#,
)
.isa("x86")
.chain_with(shared.transform_groups.by_name("widen").id);
expand.custom_legalize(ineg, "convert_ineg");
expand.custom_legalize(tls_value, "expand_tls_value");
widen.custom_legalize(ineg, "convert_ineg");
widen.build_and_add_to(&mut shared.transform_groups);
// To reduce compilation times, separate out large blocks of legalizations by
// theme.
define_simd(shared, x86_instructions);
// To reduce compilation times, separate out large blocks of legalizations by theme.
define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
expand.build_and_add_to(&mut shared.transform_groups);
let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
narrow_avx
.chain_with(narrow_id)
.build_and_add_to(&mut shared.transform_groups);
widen.build_and_add_to(&mut shared.transform_groups);
}
fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
fn define_simd(
shared: &mut SharedDefinitions,
x86_instructions: &InstructionGroup,
narrow: &mut TransformGroupBuilder,
narrow_avx: &mut TransformGroupBuilder,
) {
let insts = &shared.instructions;
let band = insts.by_name("band");
let band_not = insts.by_name("band_not");
@ -330,6 +386,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let icmp = insts.by_name("icmp");
let imax = insts.by_name("imax");
let imin = insts.by_name("imin");
let imul = insts.by_name("imul");
let ineg = insts.by_name("ineg");
let insertlane = insts.by_name("insertlane");
let ishl = insts.by_name("ishl");
@ -349,6 +406,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let vconst = insts.by_name("vconst");
let vall_true = insts.by_name("vall_true");
let vany_true = insts.by_name("vany_true");
let vselect = insts.by_name("vselect");
let x86_packss = x86_instructions.by_name("x86_packss");
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
@ -364,16 +422,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let imm = &shared.imm;
let mut narrow = TransformGroupBuilder::new(
"x86_narrow",
r#"
Legalize instructions by narrowing.
Use x86-specific instructions if needed."#,
)
.isa("x86")
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
// Set up variables and immediates.
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
@ -430,7 +478,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
// Move into the lowest 16 bits of an XMM register.
def!(a = scalar_to_vector(x)),
// Insert the value again but in the next lowest 16 bits.
def!(b = insertlane(a, uimm8_one, x)),
def!(b = insertlane(a, x, uimm8_one)),
// No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
def!(c = raw_bitcast_any16x8_to_i32x4(b)),
// Broadcast the bytes in the XMM register with PSHUFD.
@ -464,7 +512,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
// Move into the lowest 64 bits of an XMM register.
def!(a = scalar_to_vector(x)),
// Move into the highest 64 bits of the same XMM register.
def!(y = insertlane(a, uimm8_one, x)),
def!(y = insertlane(a, x, uimm8_one)),
],
);
}
@ -493,8 +541,8 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
);
}
// SIMD shift right (arithmetic)
for ty in &[I16, I32, I64] {
// SIMD shift right (arithmetic, i16x8 and i32x4)
for ty in &[I16, I32] {
let sshr = sshr.bind(vector(*ty, sse_vector_size));
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
narrow.legalize(
@ -502,6 +550,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
);
}
// SIMD shift right (arithmetic, i8x16)
{
let sshr = sshr.bind(vector(I8, sse_vector_size));
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
@ -526,6 +575,25 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
],
);
}
// SIMD shift right (arithmetic, i64x2)
{
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
let sshr_scalar_lane0 = sshr.bind(I64);
let sshr_scalar_lane1 = sshr.bind(I64);
narrow.legalize(
def!(z = sshr_vector(x, y)),
vec![
// Use scalar operations to shift the first lane.
def!(a = extractlane(x, uimm8_zero)),
def!(b = sshr_scalar_lane0(a, y)),
def!(c = insertlane(x, b, uimm8_zero)),
// Do the same for the second lane.
def!(d = extractlane(x, uimm8_one)),
def!(e = sshr_scalar_lane1(d, y)),
def!(z = insertlane(c, e, uimm8_one)),
],
);
}
// SIMD select
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
@ -540,6 +608,17 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
);
}
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
// This works, because each lane of boolean vector is filled with zeroes or ones.
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vselect = vselect.bind(vector(ty, sse_vector_size));
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
narrow.legalize(
def!(d = vselect(c, x, y)),
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
);
}
// SIMD vany_true
let ne = Literal::enumerator_for(&imm.intcc, "ne");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
@ -709,5 +788,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
narrow.custom_legalize(ushr, "convert_ushr");
narrow.custom_legalize(ishl, "convert_ishl");
narrow.build_and_add_to(&mut shared.transform_groups);
// This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
}

Просмотреть файл

@ -1,6 +1,6 @@
use crate::cdsl::cpu_modes::CpuMode;
use crate::cdsl::isa::TargetIsa;
use crate::cdsl::types::ReferenceType;
use crate::cdsl::types::{ReferenceType, VectorType};
use crate::shared::types::Bool::B1;
use crate::shared::types::Float::{F32, F64};
@ -35,6 +35,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
x86_32.legalize_monomorphic(expand_flags);
@ -46,6 +47,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
x86_32.legalize_type(F32, x86_expand);
x86_32.legalize_type(F64, x86_expand);
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
x86_64.legalize_monomorphic(expand_flags);
x86_64.legalize_default(x86_narrow);
@ -57,6 +59,7 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
x86_64.legalize_type(F32, x86_expand);
x86_64.legalize_type(F64, x86_expand);
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
let recipes = recipes::define(shared_defs, &settings, &regs);

Просмотреть файл

@ -54,6 +54,14 @@ pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
/// Bit scan reverse (stores index of first encountered 1 from the back).
pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
/// Call near, relative, displacement relative to next instruction (sign-extended).
pub static CALL_RELATIVE: [u8; 1] = [0xe8];
@ -335,6 +343,10 @@ pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
/// in XMM0 and store the values into xmm1 (SSE4.1).
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
/// Compare packed data for equal (SSE2).
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
@ -459,7 +471,11 @@ pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
/// Pop top of stack into r{16,32,64}; increment stack pointer.
pub static POP_REG: [u8; 1] = [0x58];

Просмотреть файл

@ -427,6 +427,7 @@ pub(crate) fn define<'shared>(
let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
// Stack operand with a 32-bit signed displacement from either RBP or RSP.
let stack_gpr32 = Stack::new(gpr);
@ -607,12 +608,12 @@ pub(crate) fn define<'shared>(
// XX /r with FPR ins and outs. A form with a byte immediate.
{
recipes.add_template_inferred(
EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
.operands_in(vec![fpr, fpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
&*formats.insert_lane,
"lane",
&*formats.ternary_imm8,
"imm",
8,
0,
))
@ -620,7 +621,7 @@ pub(crate) fn define<'shared>(
r#"
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
modrm_rr(in_reg1, in_reg0, sink);
let imm:i64 = lane.into();
let imm: i64 = imm.into();
sink.put1(imm as u8);
"#,
),
@ -904,14 +905,32 @@ pub(crate) fn define<'shared>(
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
);
// XX /r for BLEND* instructions
recipes.add_template_inferred(
EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
.operands_in(vec![
OperandConstraint::FixedReg(reg_xmm0),
OperandConstraint::RegClass(fpr),
OperandConstraint::RegClass(fpr),
])
.operands_out(vec![2])
.emit(
r#"
{{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
modrm_rr(in_reg1, in_reg2, sink);
"#,
),
"size_with_inferred_rex_for_inreg1_inreg2",
);
// XX /n ib with 8-bit immediate sign-extended.
{
recipes.add_template_inferred(
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
.operands_in(vec![gpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_signed_int(
&*formats.binary_imm,
&*formats.binary_imm64,
"imm",
8,
0,
@ -928,11 +947,11 @@ pub(crate) fn define<'shared>(
);
recipes.add_template_inferred(
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
.operands_in(vec![fpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_signed_int(
&*formats.binary_imm,
&*formats.binary_imm64,
"imm",
8,
0,
@ -951,11 +970,11 @@ pub(crate) fn define<'shared>(
// XX /n id with 32-bit immediate sign-extended.
recipes.add_template(
Template::new(
EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
.operands_in(vec![gpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_signed_int(
&*formats.binary_imm,
&*formats.binary_imm64,
"imm",
32,
0,
@ -977,20 +996,20 @@ pub(crate) fn define<'shared>(
// XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
{
recipes.add_template_inferred(
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
.operands_in(vec![fpr])
.operands_out(vec![fpr])
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
&*formats.extract_lane,
"lane",
&*formats.binary_imm8,
"imm",
8,
0,
)) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
))
.emit(
r#"
{{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
modrm_rr(in_reg0, out_reg0, sink);
let imm:i64 = lane.into();
let imm: i64 = imm.into();
sink.put1(imm as u8);
"#,
),
@ -1001,17 +1020,17 @@ pub(crate) fn define<'shared>(
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
{
recipes.add_template_inferred(
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
.operands_in(vec![fpr])
.operands_out(vec![gpr])
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
&*formats.extract_lane, "lane", 8, 0,
&*formats.binary_imm8, "imm", 8, 0,
))
.emit(
r#"
{{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
let imm:i64 = lane.into();
let imm: i64 = imm.into();
sink.put1(imm as u8);
"#,
), "size_with_inferred_rex_for_inreg0_outreg0"
@ -1021,12 +1040,12 @@ pub(crate) fn define<'shared>(
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
{
recipes.add_template_inferred(
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
.operands_in(vec![fpr, gpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
&*formats.insert_lane,
"lane",
&*formats.ternary_imm8,
"imm",
8,
0,
))
@ -1034,7 +1053,7 @@ pub(crate) fn define<'shared>(
r#"
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
modrm_rr(in_reg1, in_reg0, sink);
let imm:i64 = lane.into();
let imm: i64 = imm.into();
sink.put1(imm as u8);
"#,
),
@ -1432,23 +1451,7 @@ pub(crate) fn define<'shared>(
// TODO Alternative forms for 8-bit immediates, when applicable.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6)
.operands_out(vec![gpr])
.emit(
r#"
let sp = StackRef::sp(stack_slot, &func.stack_slots);
let base = stk_base(sp.base);
{{PUT_OP}}(bits, rex2(out_reg0, base), sink);
modrm_sib_disp8(out_reg0, sink);
sib_noindex(base, sink);
let imm : i32 = offset.into();
sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
"#,
),
);
recipes.add_template_recipe(
EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6)
EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
.operands_out(vec![gpr])
.emit(
r#"
@ -2871,12 +2874,12 @@ pub(crate) fn define<'shared>(
{
let has_small_offset =
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0);
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
// XX /n, MI form with imm8.
recipes.add_template(
Template::new(
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2)
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
.operands_in(vec![gpr])
.operands_out(vec![reg_rflags])
.inst_predicate(has_small_offset)
@ -2894,12 +2897,12 @@ pub(crate) fn define<'shared>(
);
let has_big_offset =
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0);
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
// XX /n, MI form with imm32.
recipes.add_template(
Template::new(
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5)
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
.operands_in(vec![gpr])
.operands_out(vec![reg_rflags])
.inst_predicate(has_big_offset)

Просмотреть файл

@ -3,6 +3,12 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
let mut settings = SettingGroupBuilder::new("x86");
settings.add_bool(
"use_new_backend",
"Whether to use the new codegen backend using the new isel",
false,
);
// CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);

Просмотреть файл

@ -4,7 +4,7 @@ use std::rc::Rc;
pub(crate) struct Formats {
pub(crate) binary: Rc<InstructionFormat>,
pub(crate) binary_imm: Rc<InstructionFormat>,
pub(crate) binary_imm64: Rc<InstructionFormat>,
pub(crate) branch: Rc<InstructionFormat>,
pub(crate) branch_float: Rc<InstructionFormat>,
pub(crate) branch_icmp: Rc<InstructionFormat>,
@ -17,14 +17,13 @@ pub(crate) struct Formats {
pub(crate) cond_trap: Rc<InstructionFormat>,
pub(crate) copy_special: Rc<InstructionFormat>,
pub(crate) copy_to_ssa: Rc<InstructionFormat>,
pub(crate) extract_lane: Rc<InstructionFormat>,
pub(crate) binary_imm8: Rc<InstructionFormat>,
pub(crate) float_compare: Rc<InstructionFormat>,
pub(crate) float_cond: Rc<InstructionFormat>,
pub(crate) float_cond_trap: Rc<InstructionFormat>,
pub(crate) func_addr: Rc<InstructionFormat>,
pub(crate) heap_addr: Rc<InstructionFormat>,
pub(crate) indirect_jump: Rc<InstructionFormat>,
pub(crate) insert_lane: Rc<InstructionFormat>,
pub(crate) int_compare: Rc<InstructionFormat>,
pub(crate) int_compare_imm: Rc<InstructionFormat>,
pub(crate) int_cond: Rc<InstructionFormat>,
@ -45,6 +44,7 @@ pub(crate) struct Formats {
pub(crate) store_complex: Rc<InstructionFormat>,
pub(crate) table_addr: Rc<InstructionFormat>,
pub(crate) ternary: Rc<InstructionFormat>,
pub(crate) ternary_imm8: Rc<InstructionFormat>,
pub(crate) trap: Rc<InstructionFormat>,
pub(crate) unary: Rc<InstructionFormat>,
pub(crate) unary_bool: Rc<InstructionFormat>,
@ -76,7 +76,9 @@ impl Formats {
binary: Builder::new("Binary").value().value().build(),
binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(),
binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
// The select instructions are controlled by the second VALUE operand.
// The first VALUE operand is the controlling flag which has a derived type.
@ -88,23 +90,18 @@ impl Formats {
.typevar_operand(1)
.build(),
ternary_imm8: Builder::new("TernaryImm8")
.value()
.imm(&imm.uimm8)
.value()
.build(),
// Catch-all for instructions with many outputs and inputs and no immediate
// operands.
multiary: Builder::new("MultiAry").varargs().build(),
nullary: Builder::new("NullAry").build(),
insert_lane: Builder::new("InsertLane")
.value()
.imm_with_name("lane", &imm.uimm8)
.value()
.build(),
extract_lane: Builder::new("ExtractLane")
.value()
.imm_with_name("lane", &imm.uimm8)
.build(),
shuffle: Builder::new("Shuffle")
.value()
.value()

Просмотреть файл

@ -559,9 +559,9 @@ fn define_simd_lane_access(
The lane index, ``Idx``, is an immediate value, not an SSA value. It
must indicate a valid lane index for the type of ``x``.
"#,
&formats.insert_lane,
&formats.ternary_imm8,
)
.operands_in(vec![x, Idx, y])
.operands_in(vec![x, y, Idx])
.operands_out(vec![a]),
);
@ -579,7 +579,7 @@ fn define_simd_lane_access(
may or may not be zeroed depending on the ISA but the type system should prevent using
``a`` as anything other than the extracted value.
"#,
&formats.extract_lane,
&formats.binary_imm8,
)
.operands_in(vec![x, Idx])
.operands_out(vec![a]),
@ -1172,6 +1172,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"uload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push(
Inst::new(
"sload8x8",
@ -1186,6 +1200,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"sload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let I32x4 = &TypeVar::new(
"I32x4",
"A SIMD vector with exactly 4 lanes of 32-bit values",
@ -1201,7 +1229,7 @@ pub(crate) fn define(
Inst::new(
"uload16x4",
r#"
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
vector.
"#,
&formats.load,
@ -1211,6 +1239,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"uload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push(
Inst::new(
"sload16x4",
@ -1225,6 +1267,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"sload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let I64x2 = &TypeVar::new(
"I64x2",
"A SIMD vector with exactly 2 lanes of 64-bit values",
@ -1250,6 +1306,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"uload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push(
Inst::new(
"sload32x2",
@ -1264,6 +1334,20 @@ pub(crate) fn define(
.can_load(true),
);
ig.push(
Inst::new(
"sload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let x = &Operand::new("x", Mem).with_doc("Value to be stored");
let a = &Operand::new("a", Mem).with_doc("Value loaded");
let Offset =
@ -2131,7 +2215,7 @@ pub(crate) fn define(
Like `icmp_imm`, but returns integer CPU flags instead of testing
a specific condition code.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![f]),
@ -2181,7 +2265,7 @@ pub(crate) fn define(
This is similar to `iadd` but the operands are interpreted as signed integers and their
summed result, instead of wrapping, will be saturated to the lowest or highest
signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
clamped to `0x7F`.
"#,
&formats.binary,
@ -2376,7 +2460,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2391,7 +2475,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2405,7 +2489,7 @@ pub(crate) fn define(
This operation traps if the divisor is zero.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2421,7 +2505,7 @@ pub(crate) fn define(
representable in `B` bits two's complement. This only happens
when `x = -2^{B-1}, Y = -1`.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2435,7 +2519,7 @@ pub(crate) fn define(
This operation traps if the divisor is zero.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2449,7 +2533,7 @@ pub(crate) fn define(
This operation traps if the divisor is zero.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2468,7 +2552,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2868,7 +2952,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2885,7 +2969,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2902,7 +2986,7 @@ pub(crate) fn define(
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2947,7 +3031,7 @@ pub(crate) fn define(
r#"
Rotate left by immediate.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -2959,7 +3043,7 @@ pub(crate) fn define(
r#"
Rotate right by immediate.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -3034,7 +3118,7 @@ pub(crate) fn define(
The shift amount is masked to the size of ``x``.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -3048,7 +3132,7 @@ pub(crate) fn define(
The shift amount is masked to the size of the register.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),
@ -3062,7 +3146,7 @@ pub(crate) fn define(
The shift amount is masked to the size of the register.
"#,
&formats.binary_imm,
&formats.binary_imm64,
)
.operands_in(vec![x, Y])
.operands_out(vec![a]),

Просмотреть файл

@ -61,6 +61,7 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
let cls = insts.by_name("cls");
let clz = insts.by_name("clz");
let ctz = insts.by_name("ctz");
let copy = insts.by_name("copy");
let fabs = insts.by_name("fabs");
let f32const = insts.by_name("f32const");
let f64const = insts.by_name("f64const");
@ -198,8 +199,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
let ah = var("ah");
let cc = var("cc");
let block = var("block");
let block1 = var("block1");
let block2 = var("block2");
let ptr = var("ptr");
let flags = var("flags");
let offset = var("off");
@ -212,8 +211,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
// embedded as part of arguments), so use a custom legalization for now.
narrow.custom_legalize(iconst, "narrow_iconst");
{
let inst = uextend.bind(I128).bind(I64);
for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
let inst = uextend.bind(ty).bind(ty_half);
narrow.legalize(
def!(a = inst(x)),
vec![
@ -223,12 +222,12 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
);
}
{
let inst = sextend.bind(I128).bind(I64);
for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
let inst = sextend.bind(ty).bind(ty_half);
narrow.legalize(
def!(a = inst(x)),
vec![
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, 63))), // splat sign bit to whole number
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
def!(a = iconcat(x, ah)),
],
);
@ -268,39 +267,45 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
],
);
narrow.legalize(
def!(brz.I128(x, block, vararg)),
vec![
def!((xl, xh) = isplit(x)),
def!(
a = icmp_imm(
Literal::enumerator_for(&imm.intcc, "eq"),
xl,
Literal::constant(&imm.imm64, 0)
)
),
def!(
b = icmp_imm(
Literal::enumerator_for(&imm.intcc, "eq"),
xh,
Literal::constant(&imm.imm64, 0)
)
),
def!(c = band(a, b)),
def!(brnz(c, block, vararg)),
],
);
for &ty in &[I128, I64] {
let block = var("block");
let block1 = var("block1");
let block2 = var("block2");
narrow.legalize(
def!(brnz.I128(x, block1, vararg)),
vec![
def!((xl, xh) = isplit(x)),
def!(brnz(xl, block1, vararg)),
def!(jump(block2, Literal::empty_vararg())),
block!(block2),
def!(brnz(xh, block1, vararg)),
],
);
narrow.legalize(
def!(brz.ty(x, block, vararg)),
vec![
def!((xl, xh) = isplit(x)),
def!(
a = icmp_imm(
Literal::enumerator_for(&imm.intcc, "eq"),
xl,
Literal::constant(&imm.imm64, 0)
)
),
def!(
b = icmp_imm(
Literal::enumerator_for(&imm.intcc, "eq"),
xh,
Literal::constant(&imm.imm64, 0)
)
),
def!(c = band(a, b)),
def!(brnz(c, block, vararg)),
],
);
narrow.legalize(
def!(brnz.ty(x, block1, vararg)),
vec![
def!((xl, xh) = isplit(x)),
def!(brnz(xl, block1, vararg)),
def!(jump(block2, Literal::empty_vararg())),
block!(block2),
def!(brnz(xh, block1, vararg)),
],
);
}
narrow.legalize(
def!(a = popcnt.I128(x)),
@ -629,6 +634,14 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
);
}
for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
let inst = ireduce.bind(ty_half).bind(ty);
expand.legalize(
def!(a = inst(x)),
vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
);
}
// Expand integer operations with carry for RISC architectures that don't have
// the flags.
let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");

Просмотреть файл

@ -25,11 +25,14 @@ pub(crate) fn define() -> SettingGroup {
- `experimental_linear_scan` is an experimental linear scan allocator. It may take less
time to allocate registers, but generated code's quality may be inferior. As of
2020-04-17, it is still experimental and it should not be used in production settings.
- `experimental_linear_scan_checked` is the linear scan allocator with additional self
checks that may take some time to run, and thus these checks are disabled by default.
"#,
vec![
"backtracking",
"backtracking_checked",
"experimental_linear_scan",
"experimental_linear_scan_checked",
],
);

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"702a281a26cf7099e1b3ca5e8bea145c113f52242be4f1e7e5b06bf129092599","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
{"files":{"Cargo.toml":"d3026bf5426d767b0b23f0a4f6272aaeb68f598a92f6c788c1f6948153fa63c3","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}

Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen-shared"
version = "0.63.0"
version = "0.64.0"
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

25
third_party/rust/cranelift-codegen/Cargo.toml поставляемый
Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen"
version = "0.63.0"
version = "0.64.0"
description = "Low-level code generator library"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-codegen"
@ -13,25 +13,27 @@ build = "build.rs"
edition = "2018"
[dependencies]
cranelift-codegen-shared = { path = "./shared", version = "0.63.0" }
cranelift-entity = { path = "../entity", version = "0.63.0" }
cranelift-bforest = { path = "../bforest", version = "0.63.0" }
cranelift-codegen-shared = { path = "./shared", version = "0.64.0" }
cranelift-entity = { path = "../entity", version = "0.64.0" }
cranelift-bforest = { path = "../bforest", version = "0.64.0" }
hashbrown = { version = "0.7", optional = true }
target-lexicon = "0.10"
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
gimli = { version = "0.20.0", default-features = false, features = ["write"], optional = true }
gimli = { version = "0.21.0", default-features = false, features = ["write"], optional = true }
smallvec = { version = "1.0.0" }
thiserror = "1.0.4"
byteorder = { version = "1.3.2", default-features = false }
regalloc = "0.0.21"
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.1.0" }
regalloc = "0.0.25"
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary
# machine code. Integration tests that need external dependencies can be
# accomodated in `tests`.
[build-dependencies]
cranelift-codegen-meta = { path = "meta", version = "0.63.0" }
cranelift-codegen-meta = { path = "meta", version = "0.64.0" }
peepmatic = { path = "../peepmatic", optional = true, version = "0.64.0" }
[features]
default = ["std", "unwind"]
@ -58,10 +60,12 @@ x86 = []
arm32 = []
arm64 = []
riscv = []
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
# Option to enable all architectures.
all-arch = [
"x86",
"x64",
"arm32",
"arm64",
"riscv"
@ -70,5 +74,12 @@ all-arch = [
# For dependent crates that want to serialize some parts of cranelift
enable-serde = ["serde"]
# Recompile our optimizations that are written in the `peepmatic` DSL into a
# compact finite-state transducer automaton.
rebuild-peephole-optimizers = ["peepmatic"]
# Enable the use of `peepmatic`-generated peephole optimizers.
enable-peepmatic = ["peepmatic-runtime"]
[badges]
maintenance = { status = "experimental" }

18
third_party/rust/cranelift-codegen/build.rs поставляемый
Просмотреть файл

@ -71,4 +71,22 @@ fn main() {
);
println!("cargo:warning=Generated files are in {}", out_dir);
}
#[cfg(feature = "rebuild-peephole-optimizers")]
rebuild_peephole_optimizers();
}
#[cfg(feature = "rebuild-peephole-optimizers")]
fn rebuild_peephole_optimizers() {
use std::path::Path;
let source_path = Path::new("src").join("preopt.peepmatic");
println!("cargo:rerun-if-changed={}", source_path.display());
let preopt =
peepmatic::compile_file(&source_path).expect("failed to compile `src/preopt.peepmatic`");
preopt
.serialize_to_file(&Path::new("src").join("preopt.serialized"))
.expect("failed to serialize peephole optimizer to `src/preopt.serialized`");
}

31
third_party/rust/cranelift-codegen/src/abi.rs поставляемый
Просмотреть файл

@ -54,6 +54,9 @@ pub enum ValueConversion {
/// Unsigned zero-extend value to the required type.
Uext(Type),
/// Pass value by pointer of given integer type.
Pointer(Type),
}
impl ValueConversion {
@ -63,7 +66,7 @@ impl ValueConversion {
Self::IntSplit => ty.half_width().expect("Integer type too small to split"),
Self::VectorSplit => ty.half_vector().expect("Not a vector"),
Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
Self::Sext(nty) | Self::Uext(nty) => nty,
Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty,
}
}
@ -74,6 +77,11 @@ impl ValueConversion {
_ => false,
}
}
/// Is this a conversion to pointer?
pub fn is_pointer(self) -> bool {
matches!(self, Self::Pointer(_))
}
}
/// Common trait for assigning arguments to registers or stack locations.
@ -110,10 +118,16 @@ pub fn legalize_args<AA: ArgAssigner>(args: &[AbiParam], aa: &mut AA) -> Option<
}
// Split this argument into two smaller ones. Then revisit both.
ArgAction::Convert(conv) => {
debug_assert!(
!arg.legalized_to_pointer,
"No more conversions allowed after conversion to pointer"
);
let value_type = conv.apply(arg.value_type);
let new_arg = AbiParam { value_type, ..arg };
args.to_mut()[argno].value_type = value_type;
if conv.is_split() {
if conv.is_pointer() {
args.to_mut()[argno].legalized_to_pointer = true;
} else if conv.is_split() {
let new_arg = AbiParam { value_type, ..arg };
args.to_mut().insert(argno + 1, new_arg);
}
}
@ -152,6 +166,10 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
let have_bits = have.bits();
let arg_bits = arg.value_type.bits();
if arg.legalized_to_pointer {
return ValueConversion::Pointer(arg.value_type);
}
match have_bits.cmp(&arg_bits) {
// We have fewer bits than the ABI argument.
Ordering::Less => {
@ -226,5 +244,12 @@ mod tests {
legalize_abi_value(types::F64, &arg),
ValueConversion::IntBits
);
// Value is passed by reference
arg.legalized_to_pointer = true;
assert_eq!(
legalize_abi_value(types::F64, &arg),
ValueConversion::Pointer(types::I32)
);
}
}

Просмотреть файл

@ -15,7 +15,8 @@ const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
/// The first value in the bitmap is of the lowest addressed slot on the stack.
/// As all stacks in Isa's supported by Cranelift grow down, this means that
/// first value is of the top of the stack and values proceed down the stack.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Stackmap {
bitmap: Vec<BitSet<Num>>,
mapped_words: u32,

Просмотреть файл

@ -5,12 +5,14 @@
//!
//! If you would like to add support for larger bitsets in the future, you need to change the trait
//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
use core::convert::{From, Into};
use core::mem::size_of;
use core::ops::{Add, BitOr, Shl, Sub};
/// A small bitset built on a single primitive integer type
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
pub struct BitSet<T>(pub T);
impl<T> BitSet<T>

Просмотреть файл

@ -27,6 +27,7 @@ use crate::nan_canonicalization::do_nan_canonicalization;
use crate::postopt::do_postopt;
use crate::redundant_reload_remover::RedundantReloadRemover;
use crate::regalloc;
use crate::remove_constant_phis::do_remove_constant_phis;
use crate::result::CodegenResult;
use crate::settings::{FlagsOrIsa, OptLevel};
use crate::simple_gvn::do_simple_gvn;
@ -179,6 +180,8 @@ impl Context {
self.dce(isa)?;
}
self.remove_constant_phis(isa)?;
if let Some(backend) = isa.get_mach_backend() {
let result = backend.compile_function(&self.func, self.want_disasm)?;
let info = result.code_info();
@ -224,7 +227,7 @@ impl Context {
let _tt = timing::binemit();
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
if let Some(ref result) = &self.mach_compile_result {
result.sections.emit(&mut sink);
result.buffer.emit(&mut sink);
} else {
isa.emit_function_to_memory(&self.func, &mut sink);
}
@ -292,6 +295,16 @@ impl Context {
Ok(())
}
/// Perform constant-phi removal on the function.
pub fn remove_constant_phis<'a, FOI: Into<FlagsOrIsa<'a>>>(
&mut self,
fisa: FOI,
) -> CodegenResult<()> {
do_remove_constant_phis(&mut self.func, &mut self.domtree);
self.verify_if(fisa)?;
Ok(())
}
/// Perform pre-legalization rewrites on the function.
pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
do_preopt(&mut self.func, &mut self.cfg, isa);

Просмотреть файл

@ -794,15 +794,20 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
if !self.srcloc.is_default() {
self.func.srclocs[inst] = self.srcloc;
}
// Assign an encoding.
// XXX Is there a way to describe this error to the user?
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
match self
.isa
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
{
Ok(e) => self.func.encodings[inst] = e,
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
// Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
// during lowering.
if self.isa.get_mach_backend().is_none() {
// Assign an encoding.
// XXX Is there a way to describe this error to the user?
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
match self
.isa
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
{
Ok(e) => self.func.encodings[inst] = e,
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
}
}
&mut self.func.dfg

Просмотреть файл

@ -40,3 +40,24 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
let opcode = data.opcode();
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
}
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
}
/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
/// represented in 64 bits?
pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
let data = &func.dfg[inst];
if data.opcode() == Opcode::Null {
return Some(0);
}
match data {
&InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
&InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
&InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
&InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
_ => None,
}
}

Просмотреть файл

@ -234,11 +234,7 @@ impl DataFlowGraph {
/// Get the type of a value.
pub fn value_type(&self, v: Value) -> Type {
match self.values[v] {
ValueData::Inst { ty, .. }
| ValueData::Param { ty, .. }
| ValueData::Alias { ty, .. } => ty,
}
self.values[v].ty()
}
/// Get the definition of a value.
@ -383,9 +379,14 @@ pub enum ValueDef {
impl ValueDef {
/// Unwrap the instruction where the value was defined, or panic.
pub fn unwrap_inst(&self) -> Inst {
self.inst().expect("Value is not an instruction result")
}
/// Get the instruction where the value was defined, if any.
pub fn inst(&self) -> Option<Inst> {
match *self {
Self::Result(inst, _) => inst,
_ => panic!("Value is not an instruction result"),
Self::Result(inst, _) => Some(inst),
_ => None,
}
}
@ -428,6 +429,16 @@ enum ValueData {
Alias { ty: Type, original: Value },
}
impl ValueData {
fn ty(&self) -> Type {
match *self {
ValueData::Inst { ty, .. }
| ValueData::Param { ty, .. }
| ValueData::Alias { ty, .. } => ty,
}
}
}
/// Instructions.
///
impl DataFlowGraph {

Просмотреть файл

@ -7,6 +7,7 @@
use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
use crate::isa::{CallConv, RegInfo, RegUnit};
use crate::machinst::RelocDistance;
use alloc::vec::Vec;
use core::fmt;
use core::str::FromStr;
@ -155,6 +156,8 @@ pub struct AbiParam {
/// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
/// been legalized.
pub location: ArgumentLoc,
/// Was the argument converted to pointer during legalization?
pub legalized_to_pointer: bool,
}
impl AbiParam {
@ -165,6 +168,7 @@ impl AbiParam {
extension: ArgumentExtension::None,
purpose: ArgumentPurpose::Normal,
location: Default::default(),
legalized_to_pointer: false,
}
}
@ -175,6 +179,7 @@ impl AbiParam {
extension: ArgumentExtension::None,
purpose,
location: Default::default(),
legalized_to_pointer: false,
}
}
@ -185,6 +190,7 @@ impl AbiParam {
extension: ArgumentExtension::None,
purpose,
location: ArgumentLoc::Reg(regunit),
legalized_to_pointer: false,
}
}
@ -218,6 +224,9 @@ pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAbiParam<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0.value_type)?;
if self.0.legalized_to_pointer {
write!(f, " ptr")?;
}
match self.0.extension {
ArgumentExtension::None => {}
ArgumentExtension::Uext => write!(f, " uext")?,
@ -366,6 +375,16 @@ pub struct ExtFuncData {
/// Will this function be defined nearby, such that it will always be a certain distance away,
/// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
/// symbols meant to be preemptible cannot be considered colocated.
///
/// If `true`, some backends may use relocation forms that have limited range. The exact
/// distance depends on the code model in use. Currently on AArch64, for example, Cranelift
/// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how
/// far away the target will be, it is best not to set the `colocated` flag; in general, this
/// flag is best used when the target is known to be in the same unit of code generation, such
/// as a Wasm module.
///
/// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A
/// `colocated` flag value of `true` implies `RelocDistance::Near`.
pub colocated: bool,
}
@ -378,6 +397,17 @@ impl fmt::Display for ExtFuncData {
}
}
impl ExtFuncData {
/// Return an estimate of the distance to the referred-to function symbol.
pub fn reloc_distance(&self) -> RelocDistance {
if self.colocated {
RelocDistance::Near
} else {
RelocDistance::Far
}
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -393,6 +423,8 @@ mod tests {
assert_eq!(t.sext().to_string(), "i32 sext");
t.purpose = ArgumentPurpose::StructReturn;
assert_eq!(t.to_string(), "i32 uext sret");
t.legalized_to_pointer = true;
assert_eq!(t.to_string(), "i32 ptr uext sret");
}
#[test]

Просмотреть файл

@ -308,6 +308,30 @@ impl Function {
// function, assume it is not a leaf.
self.dfg.signatures.is_empty()
}
/// Replace the `dst` instruction's data with the `src` instruction's data
/// and then remove `src`.
///
/// `src` and its result values should not be used at all, as any uses would
/// be left dangling after calling this method.
///
/// `src` and `dst` must have the same number of resulting values, and
/// `src`'s i^th value must have the same type as `dst`'s i^th value.
pub fn transplant_inst(&mut self, dst: Inst, src: Inst) {
debug_assert_eq!(
self.dfg.inst_results(dst).len(),
self.dfg.inst_results(src).len()
);
debug_assert!(self
.dfg
.inst_results(dst)
.iter()
.zip(self.dfg.inst_results(src))
.all(|(a, b)| self.dfg.value_type(*a) == self.dfg.value_type(*b)));
self.dfg[dst] = self.dfg[src].clone();
self.layout.remove_inst(src);
}
}
/// Additional annotations for function display.

Просмотреть файл

@ -3,6 +3,7 @@
use crate::ir::immediates::{Imm64, Offset32};
use crate::ir::{ExternalName, GlobalValue, Type};
use crate::isa::TargetIsa;
use crate::machinst::RelocDistance;
use core::fmt;
/// Information about a global value declaration.
@ -62,6 +63,10 @@ pub enum GlobalValueData {
/// Will this symbol be defined nearby, such that it will always be a certain distance
/// away, after linking? If so, references to it can avoid going through a GOT. Note that
/// symbols meant to be preemptible cannot be colocated.
///
/// If `true`, some backends may use relocation forms that have limited range: for example,
/// a +/- 2^27-byte range on AArch64. See the documentation for
/// [`RelocDistance`](machinst::RelocDistance) for more details.
colocated: bool,
/// Does this symbol refer to a thread local storage value?
@ -85,6 +90,20 @@ impl GlobalValueData {
Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type,
}
}
/// If this global references a symbol, return an estimate of the relocation distance,
/// based on the `colocated` flag.
pub fn maybe_reloc_distance(&self) -> Option<RelocDistance> {
match self {
&GlobalValueData::Symbol {
colocated: true, ..
} => Some(RelocDistance::Near),
&GlobalValueData::Symbol {
colocated: false, ..
} => Some(RelocDistance::Far),
_ => None,
}
}
}
impl fmt::Display for GlobalValueData {

Просмотреть файл

@ -62,6 +62,21 @@ impl Imm64 {
pub fn bits(&self) -> i64 {
self.0
}
/// Sign extend this immediate as if it were a signed integer of the given
/// power-of-two width.
pub fn sign_extend_from_width(&mut self, bit_width: u16) {
debug_assert!(bit_width.is_power_of_two());
if bit_width >= 64 {
return;
}
let bit_width = bit_width as i64;
let delta = 64 - bit_width;
let sign_extended = (self.0 << delta) >> delta;
*self = Imm64(sign_extended);
}
}
impl Into<i64> for Imm64 {

Просмотреть файл

@ -11,9 +11,7 @@ use core::fmt::{self, Display, Formatter};
use core::ops::{Deref, DerefMut};
use core::str::FromStr;
use crate::ir;
use crate::ir::types;
use crate::ir::{Block, FuncRef, JumpTable, SigRef, Type, Value};
use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
use crate::isa;
use crate::bitset::BitSet;
@ -257,6 +255,30 @@ impl InstructionData {
}
}
/// If this is a trapping instruction, get its trap code. Otherwise, return
/// `None`.
pub fn trap_code(&self) -> Option<TrapCode> {
match *self {
Self::CondTrap { code, .. }
| Self::FloatCondTrap { code, .. }
| Self::IntCondTrap { code, .. }
| Self::Trap { code, .. } => Some(code),
_ => None,
}
}
/// If this is a trapping instruction, get an exclusive reference to its
/// trap code. Otherwise, return `None`.
pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> {
match self {
Self::CondTrap { code, .. }
| Self::FloatCondTrap { code, .. }
| Self::IntCondTrap { code, .. }
| Self::Trap { code, .. } => Some(code),
_ => None,
}
}
/// Return information about a call instruction.
///
/// Any instruction that can call another function reveals its call signature here.
@ -274,6 +296,39 @@ impl InstructionData {
}
}
}
#[inline]
pub(crate) fn sign_extend_immediates(&mut self, ctrl_typevar: Type) {
if ctrl_typevar.is_invalid() {
return;
}
let bit_width = ctrl_typevar.bits();
match self {
Self::BinaryImm64 {
opcode,
arg: _,
imm,
} => {
if matches!(opcode, Opcode::SdivImm | Opcode::SremImm) {
imm.sign_extend_from_width(bit_width);
}
}
Self::IntCompareImm {
opcode,
arg: _,
cond,
imm,
} => {
debug_assert_eq!(*opcode, Opcode::IcmpImm);
if cond.unsigned() != *cond {
imm.sign_extend_from_width(bit_width);
}
}
_ => {}
}
}
}
/// Information about branch and jump instructions.

Просмотреть файл

@ -24,6 +24,20 @@ pub enum LibCall {
/// probe for stack overflow. These are emitted for functions which need
/// when the `enable_probestack` setting is true.
Probestack,
/// udiv.i64
UdivI64,
/// sdiv.i64
SdivI64,
/// urem.i64
UremI64,
/// srem.i64
SremI64,
/// ishl.i64
IshlI64,
/// ushr.i64
UshrI64,
/// sshr.i64
SshrI64,
/// ceil.f32
CeilF32,
/// ceil.f64
@ -63,6 +77,13 @@ impl FromStr for LibCall {
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"Probestack" => Ok(Self::Probestack),
"UdivI64" => Ok(Self::UdivI64),
"SdivI64" => Ok(Self::SdivI64),
"UremI64" => Ok(Self::UremI64),
"SremI64" => Ok(Self::SremI64),
"IshlI64" => Ok(Self::IshlI64),
"UshrI64" => Ok(Self::UshrI64),
"SshrI64" => Ok(Self::SshrI64),
"CeilF32" => Ok(Self::CeilF32),
"CeilF64" => Ok(Self::CeilF64),
"FloorF32" => Ok(Self::FloorF32),
@ -88,6 +109,16 @@ impl LibCall {
/// Returns `None` if no well-known library routine name exists for that instruction.
pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
Some(match ctrl_type {
types::I64 => match opcode {
Opcode::Udiv => Self::UdivI64,
Opcode::Sdiv => Self::SdivI64,
Opcode::Urem => Self::UremI64,
Opcode::Srem => Self::SremI64,
Opcode::Ishl => Self::IshlI64,
Opcode::Ushr => Self::UshrI64,
Opcode::Sshr => Self::SshrI64,
_ => return None,
},
types::F32 => match opcode {
Opcode::Ceil => Self::CeilF32,
Opcode::Floor => Self::FloorF32,

Просмотреть файл

@ -27,9 +27,6 @@ pub enum TrapCode {
/// A `table_addr` instruction detected an out-of-bounds error.
TableOutOfBounds,
/// Other bounds checking error.
OutOfBounds,
/// Indirect call to a null table entry.
IndirectCallToNull,
@ -63,7 +60,6 @@ impl Display for TrapCode {
StackOverflow => "stk_ovf",
HeapOutOfBounds => "heap_oob",
TableOutOfBounds => "table_oob",
OutOfBounds => "oob",
IndirectCallToNull => "icall_null",
BadSignature => "bad_sig",
IntegerOverflow => "int_ovf",
@ -86,7 +82,6 @@ impl FromStr for TrapCode {
"stk_ovf" => Ok(StackOverflow),
"heap_oob" => Ok(HeapOutOfBounds),
"table_oob" => Ok(TableOutOfBounds),
"oob" => Ok(OutOfBounds),
"icall_null" => Ok(IndirectCallToNull),
"bad_sig" => Ok(BadSignature),
"int_ovf" => Ok(IntegerOverflow),
@ -106,11 +101,10 @@ mod tests {
use alloc::string::ToString;
// Everything but user-defined codes.
const CODES: [TrapCode; 11] = [
const CODES: [TrapCode; 10] = [
TrapCode::StackOverflow,
TrapCode::HeapOutOfBounds,
TrapCode::TableOutOfBounds,
TrapCode::OutOfBounds,
TrapCode::IndirectCallToNull,
TrapCode::BadSignature,
TrapCode::IntegerOverflow,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -3,14 +3,14 @@
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::binemit::CodeOffset;
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::ty_bits;
use crate::machinst::MachLabel;
use regalloc::{RealRegUniverse, Reg, Writable};
use core::convert::{Into, TryFrom};
use core::convert::Into;
use std::string::String;
/// A shift operator for a register or immediate.
@ -112,7 +112,9 @@ pub enum MemLabel {
/// A memory argument to load/store, encapsulating the possible addressing modes.
#[derive(Clone, Debug)]
pub enum MemArg {
Label(MemLabel),
//
// Real ARM64 addressing modes:
//
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
PostIndexed(Writable<Reg>, SImm9),
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
@ -137,11 +139,35 @@ pub enum MemArg {
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
UnsignedOffset(Reg, UImm12Scaled),
/// Offset from the stack pointer. Lowered into a real amode at emission.
SPOffset(i64),
//
// virtual addressing modes that are lowered at emission time:
//
/// Reference to a "label": e.g., a symbol.
Label(MemLabel),
/// Offset from the frame pointer. Lowered into a real amode at emission.
FPOffset(i64),
/// Arbitrary offset from a register. Converted to generation of large
/// offsets with multiple instructions as necessary during code emission.
RegOffset(Reg, i64, Type),
/// Offset from the stack pointer.
SPOffset(i64, Type),
/// Offset from the frame pointer.
FPOffset(i64, Type),
/// Offset from the "nominal stack pointer", which is where the real SP is
/// just after stack and spill slots are allocated in the function prologue.
/// At emission time, this is converted to `SPOffset` with a fixup added to
/// the offset constant. The fixup is a running value that is tracked as
/// emission iterates through instructions in linear order, and can be
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
///
/// The standard ABI is in charge of handling this (by emitting the
/// adjustment meta-instructions). It maintains the invariant that "nominal
/// SP" is where the actual SP is after the function prologue and before
/// clobber pushes. See the diagram in the documentation for
/// [crate::isa::aarch64::abi](the ABI module) for more details.
NominalSPOffset(i64, Type),
}
impl MemArg {
@ -152,17 +178,6 @@ impl MemArg {
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
}
/// Memory reference using an address in a register and an offset, if possible.
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
Some(MemArg::Unscaled(reg, simm9))
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
Some(MemArg::UnsignedOffset(reg, uimm12s))
} else {
None
}
}
/// Memory reference using the sum of two registers as an address.
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
MemArg::RegReg(reg1, reg2)
@ -281,78 +296,44 @@ impl CondBrKind {
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BranchTarget {
/// An unresolved reference to a BlockIndex, as passed into
/// An unresolved reference to a Label, as passed into
/// `lower_branch_group()`.
Block(BlockIndex),
/// A resolved reference to another instruction, after
/// `Inst::with_block_offsets()`.
ResolvedOffset(isize),
Label(MachLabel),
/// A fixed PC offset.
ResolvedOffset(i32),
}
impl BranchTarget {
/// Lower the branch target given offsets of each block.
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
/// Return the target's label, if it is a label-based target.
pub fn as_label(self) -> Option<MachLabel> {
match self {
&mut BranchTarget::Block(bix) => {
let bix = usize::try_from(bix).unwrap();
assert!(bix < targets.len());
let block_offset_in_func = targets[bix];
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
*self = BranchTarget::ResolvedOffset(branch_offset);
}
&mut BranchTarget::ResolvedOffset(..) => {}
}
}
/// Get the block index.
pub fn as_block_index(&self) -> Option<BlockIndex> {
match self {
&BranchTarget::Block(bix) => Some(bix),
BranchTarget::Label(l) => Some(l),
_ => None,
}
}
/// Get the offset as 4-byte words. Returns `0` if not
/// yet resolved (in that case, we're only computing
/// size and the offset doesn't matter).
pub fn as_offset_words(&self) -> isize {
match self {
&BranchTarget::ResolvedOffset(off) => off >> 2,
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_offset19_or_zero(self) -> u32 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 2,
_ => 0,
}
};
assert!(off <= 0x3ffff);
assert!(off >= -0x40000);
(off as u32) & 0x7ffff
}
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
pub fn as_off26(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 25)) && (off >= -(1 << 25)) {
Some((off as u32) & ((1 << 26) - 1))
} else {
None
}
}
/// Get the offset as a 19-bit offset, or `None` if overflow.
pub fn as_off19(&self) -> Option<u32> {
let off = self.as_offset_words();
if (off < (1 << 18)) && (off >= -(1 << 18)) {
Some((off as u32) & ((1 << 19) - 1))
} else {
None
}
}
/// Map the block index given a transform map.
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
match self {
&mut BranchTarget::Block(ref mut bix) => {
let n = block_index_map[usize::try_from(*bix).unwrap()];
*bix = n;
}
&mut BranchTarget::ResolvedOffset(_) => {}
}
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_offset26_or_zero(self) -> u32 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 2,
_ => 0,
};
assert!(off <= 0x1ffffff);
assert!(off >= -0x2000000);
(off as u32) & 0x3ffffff
}
}
@ -443,8 +424,11 @@ impl ShowWithRRU for MemArg {
simm9.show_rru(mb_rru)
),
// Eliminated by `mem_finalize()`.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Unexpected stack-offset mem-arg mode!")
&MemArg::SPOffset(..)
| &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..)
| &MemArg::RegOffset(..) => {
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
}
}
}
@ -485,18 +469,21 @@ impl ShowWithRRU for Cond {
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Block(block) => format!("block{}", block),
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
}
}
}
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
/// 64-bit variants of many instructions (and integer registers).
/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
/// variants of vector instructions.
/// TODO: Create a separate type for SIMD & floating-point operands.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InstSize {
Size32,
Size64,
Size128,
}
impl InstSize {
@ -519,11 +506,13 @@ impl InstSize {
/// Convert from a needed width to the smallest size that fits.
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
let bits: usize = bits.into();
assert!(bits <= 64);
assert!(bits <= 128);
if bits <= 32 {
InstSize::Size32
} else {
} else if bits <= 64 {
InstSize::Size64
} else {
InstSize::Size128
}
}
@ -532,11 +521,12 @@ impl InstSize {
Self::from_bits(ty_bits(ty))
}
/// Convert to I32 or I64.
/// Convert to I32, I64, or I128.
pub fn to_ty(self) -> Type {
match self {
InstSize::Size32 => I32,
InstSize::Size64 => I64,
InstSize::Size128 => I128,
}
}
@ -544,6 +534,9 @@ impl InstSize {
match self {
InstSize::Size32 => 0,
InstSize::Size64 => 1,
_ => {
panic!("Unexpected size");
}
}
}
}

Просмотреть файл

@ -4,12 +4,13 @@ use crate::binemit::{CodeOffset, Reloc};
use crate::ir::constant::ConstantData;
use crate::ir::types::*;
use crate::ir::TrapCode;
use crate::isa::aarch64::{inst::regs::PINNED_REG, inst::*};
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::ty_bits;
use regalloc::{Reg, RegClass, Writable};
use alloc::vec::Vec;
use core::convert::TryFrom;
use log::debug;
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
/// offset, possibly emitting relocation(s) as necessary.
@ -23,43 +24,67 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
/// generic arbitrary stack offset) into real addressing modes, possibly by
/// emitting some helper instructions that come immediately before the use
/// of this amode.
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
pub fn mem_finalize(
insn_off: CodeOffset,
mem: &MemArg,
state: &EmitState,
) -> (SmallVec<[Inst; 4]>, MemArg) {
match mem {
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
&MemArg::RegOffset(_, off, ty)
| &MemArg::SPOffset(off, ty)
| &MemArg::FPOffset(off, ty)
| &MemArg::NominalSPOffset(off, ty) => {
let basereg = match mem {
&MemArg::SPOffset(..) => stack_reg(),
&MemArg::RegOffset(reg, _, _) => reg,
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
&MemArg::FPOffset(..) => fp_reg(),
_ => unreachable!(),
};
let adj = match mem {
&MemArg::NominalSPOffset(..) => {
debug!(
"mem_finalize: nominal SP offset {} + adj {} -> {}",
off,
state.virtual_sp_offset,
off + state.virtual_sp_offset
);
state.virtual_sp_offset
}
_ => 0,
};
let off = off + adj;
if let Some(simm9) = SImm9::maybe_from_i64(off) {
let mem = MemArg::Unscaled(basereg, simm9);
(vec![], mem)
(smallvec![], mem)
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
let mem = MemArg::UnsignedOffset(basereg, uimm12s);
(smallvec![], mem)
} else {
// In an addition, x31 is the zero register, not sp; we have only one temporary
// so we can't do the proper add here.
debug_assert_ne!(
basereg,
stack_reg(),
"should have diverted SP before mem_finalize"
);
let tmp = writable_spilltmp_reg();
let mut const_insts = Inst::load_constant(tmp, off as u64);
let add_inst = Inst::AluRRR {
// N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
// (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
// is a valid base (for SPOffset) which we must handle here.
// Also, SP needs to be the first arg, not second.
let add_inst = Inst::AluRRRExtend {
alu_op: ALUOp::Add64,
rd: tmp,
rn: tmp.to_reg(),
rm: basereg,
rn: basereg,
rm: tmp.to_reg(),
extendop: ExtendOp::UXTX,
};
const_insts.push(add_inst);
(const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
(const_insts, MemArg::reg(tmp.to_reg()))
}
}
&MemArg::Label(ref label) => {
let off = memlabel_finalize(insn_off, label);
(vec![], MemArg::Label(MemLabel::PCRel(off)))
(smallvec![], MemArg::Label(MemLabel::PCRel(off)))
}
_ => (vec![], mem.clone()),
_ => (smallvec![], mem.clone()),
}
}
@ -73,12 +98,12 @@ pub fn u64_constant(bits: u64) -> ConstantData {
// Instructions and subcomponents: emission
fn machreg_to_gpr(m: Reg) -> u32 {
assert!(m.get_class() == RegClass::I64);
assert_eq!(m.get_class(), RegClass::I64);
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
}
fn machreg_to_vec(m: Reg) -> u32 {
assert!(m.get_class() == RegClass::V128);
assert_eq!(m.get_class(), RegClass::V128);
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
}
@ -137,6 +162,14 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
}
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
match kind {
CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
}
}
const MOVE_WIDE_FIXED: u32 = 0x92800000;
#[repr(u32)]
@ -275,8 +308,8 @@ fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) ->
}
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
debug_assert!(!is_16b); // to be supported later.
0b00001110_101_00000_00011_1_00000_00000
| ((is_16b as u32) << 30)
| machreg_to_vec(rd.to_reg())
| (machreg_to_vec(rn) << 16)
| (machreg_to_vec(rn) << 5)
@ -322,8 +355,29 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
}
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
fn emit(&self, sink: &mut O, flags: &settings::Flags) {
fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
}
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
virtual_sp_offset: i64,
}
impl MachInstEmit for Inst {
type State = EmitState;
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
// N.B.: we *must* not exceed the "worst-case size" used to compute
// where to insert islands, except when islands are explicitly triggered
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
// to allow disabling the check for `JTSequence`, which is always
// emitted following an `EmitIsland`.
let mut start_off = sink.cur_offset();
match self {
&Inst::AluRRR { alu_op, rd, rn, rm } => {
let top11 = match alu_op {
@ -596,10 +650,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
// ldst encoding helpers take Reg, not Writable<Reg>.
@ -608,17 +662,17 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
// This is the base opcode (top 10 bits) for the "unscaled
// immediate" form (Unscaled). Other addressing modes will OR in
// other values for bits 24/25 (bits 1/2 of this constant).
let op = match self {
&Inst::ULoad8 { .. } => 0b0011100001,
&Inst::SLoad8 { .. } => 0b0011100010,
&Inst::ULoad16 { .. } => 0b0111100001,
&Inst::SLoad16 { .. } => 0b0111100010,
&Inst::ULoad32 { .. } => 0b1011100001,
&Inst::SLoad32 { .. } => 0b1011100010,
&Inst::ULoad64 { .. } => 0b1111100001,
&Inst::FpuLoad32 { .. } => 0b1011110001,
&Inst::FpuLoad64 { .. } => 0b1111110001,
&Inst::FpuLoad128 { .. } => 0b0011110011,
let (op, bits) = match self {
&Inst::ULoad8 { .. } => (0b0011100001, 8),
&Inst::SLoad8 { .. } => (0b0011100010, 8),
&Inst::ULoad16 { .. } => (0b0111100001, 16),
&Inst::SLoad16 { .. } => (0b0111100010, 16),
&Inst::ULoad32 { .. } => (0b1011100001, 32),
&Inst::SLoad32 { .. } => (0b1011100010, 32),
&Inst::ULoad64 { .. } => (0b1111100001, 64),
&Inst::FpuLoad32 { .. } => (0b1011110001, 32),
&Inst::FpuLoad64 { .. } => (0b1111110001, 64),
&Inst::FpuLoad128 { .. } => (0b0011110011, 128),
_ => unreachable!(),
};
@ -632,6 +686,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
}
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
if uimm12scaled.value() != 0 {
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
}
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
}
&MemArg::RegReg(r1, r2) => {
@ -640,19 +697,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
));
}
&MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
match (ty, self) {
(I8, &Inst::ULoad8 { .. }) => {}
(I8, &Inst::SLoad8 { .. }) => {}
(I16, &Inst::ULoad16 { .. }) => {}
(I16, &Inst::SLoad16 { .. }) => {}
(I32, &Inst::ULoad32 { .. }) => {}
(I32, &Inst::SLoad32 { .. }) => {}
(I64, &Inst::ULoad64 { .. }) => {}
(F32, &Inst::FpuLoad32 { .. }) => {}
(F64, &Inst::FpuLoad64 { .. }) => {}
(I128, &Inst::FpuLoad128 { .. }) => {}
_ => panic!("Mismatching reg-scaling type in MemArg"),
}
assert_eq!(bits, ty_bits(ty));
let extendop = match &mem {
&MemArg::RegScaled(..) => None,
&MemArg::RegScaledExtended(_, _, _, op) => Some(op),
@ -697,9 +742,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
}
// Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Should not see stack-offset here!")
}
&MemArg::SPOffset(..)
| &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
}
}
@ -739,20 +785,20 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ref mem,
srcloc,
} => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags);
inst.emit(sink, flags, state);
}
let op = match self {
&Inst::Store8 { .. } => 0b0011100000,
&Inst::Store16 { .. } => 0b0111100000,
&Inst::Store32 { .. } => 0b1011100000,
&Inst::Store64 { .. } => 0b1111100000,
&Inst::FpuStore32 { .. } => 0b1011110000,
&Inst::FpuStore64 { .. } => 0b1111110000,
&Inst::FpuStore128 { .. } => 0b0011110010,
let (op, bits) = match self {
&Inst::Store8 { .. } => (0b0011100000, 8),
&Inst::Store16 { .. } => (0b0111100000, 16),
&Inst::Store32 { .. } => (0b1011100000, 32),
&Inst::Store64 { .. } => (0b1111100000, 64),
&Inst::FpuStore32 { .. } => (0b1011110000, 32),
&Inst::FpuStore64 { .. } => (0b1111110000, 64),
&Inst::FpuStore128 { .. } => (0b0011110010, 128),
_ => unreachable!(),
};
@ -766,6 +812,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
}
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
if uimm12scaled.value() != 0 {
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
}
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
}
&MemArg::RegReg(r1, r2) => {
@ -794,9 +843,10 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
}
// Eliminated by `mem_finalize()` above.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
panic!("Should not see stack-offset here!")
}
&MemArg::SPOffset(..)
| &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
}
}
@ -883,6 +933,9 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
&Inst::FpuMove64 { rd, rn } => {
sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
}
&Inst::FpuMove128 { rd, rn } => {
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let top22 = match fpu_op {
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
@ -913,6 +966,44 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
};
sink.put4(enc_fpurrr(top22, rd, rn, rm));
}
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
FPUOpRI::UShr32(imm) => {
debug_assert_eq!(32, imm.lane_size_in_bits);
sink.put4(
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
| imm.enc() << 16
| machreg_to_vec(rn) << 5
| machreg_to_vec(rd.to_reg()),
)
}
FPUOpRI::UShr64(imm) => {
debug_assert_eq!(64, imm.lane_size_in_bits);
sink.put4(
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
| imm.enc() << 16
| machreg_to_vec(rn) << 5
| machreg_to_vec(rd.to_reg()),
)
}
FPUOpRI::Sli64(imm) => {
debug_assert_eq!(64, imm.lane_size_in_bits);
sink.put4(
0b01_1_111110_0000000_010101_00000_00000
| imm.enc() << 16
| machreg_to_vec(rn) << 5
| machreg_to_vec(rd.to_reg()),
)
}
FPUOpRI::Sli32(imm) => {
debug_assert_eq!(32, imm.lane_size_in_bits);
sink.put4(
0b0_0_1_011110_0000000_010101_00000_00000
| imm.enc() << 16
| machreg_to_vec(rn) << 5
| machreg_to_vec(rd.to_reg()),
)
}
},
&Inst::FpuRRRR {
fpu_op,
rd,
@ -926,6 +1017,15 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
};
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
}
&Inst::VecMisc { op, rd, rn, ty } => {
let bits_12_16 = match op {
VecMisc2::Not => {
debug_assert_eq!(I8X16, ty);
0b00101
}
};
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
}
&Inst::FpuCmp32 { rn, rm } => {
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
}
@ -980,11 +1080,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(8),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put4(const_data.to_bits());
}
&Inst::LoadFpuConst64 { rd, const_data } => {
@ -993,13 +1093,29 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put8(const_data.to_bits());
}
&Inst::LoadFpuConst128 { rd, const_data } => {
let inst = Inst::FpuLoad128 {
rd,
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(20),
};
inst.emit(sink, flags, state);
for i in const_data.to_le_bytes().iter() {
sink.put1(*i);
}
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
}
@ -1033,12 +1149,40 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
| machreg_to_gpr(rd.to_reg()),
);
}
&Inst::VecRRR { rd, rn, rm, alu_op } => {
&Inst::VecRRR {
rd,
rn,
rm,
alu_op,
ty,
} => {
let enc_size_for_cmp = match ty {
I8X16 => 0b00,
_ => 0,
};
let (top11, bit15_10) = match alu_op {
VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
VecALUOp::SQAddScalar => {
debug_assert_eq!(I64, ty);
(0b010_11110_11_1, 0b000011)
}
VecALUOp::SQSubScalar => {
debug_assert_eq!(I64, ty);
(0b010_11110_11_1, 0b001011)
}
VecALUOp::UQAddScalar => {
debug_assert_eq!(I64, ty);
(0b011_11110_11_1, 0b000011)
}
VecALUOp::UQSubScalar => {
debug_assert_eq!(I64, ty);
(0b011_11110_11_1, 0b001011)
}
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
};
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
}
@ -1084,7 +1228,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
if top22 != 0 {
sink.put4(enc_extend(top22, rd, rn));
} else {
Inst::mov32(rd, rn).emit(sink, flags);
Inst::mov32(rd, rn).emit(sink, flags, state);
}
}
&Inst::Extend {
@ -1107,7 +1251,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: zero_reg(),
rm: rd.to_reg(),
};
sub_inst.emit(sink, flags);
sub_inst.emit(sink, flags, state);
}
&Inst::Extend {
rd,
@ -1127,10 +1271,14 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
panic!("Unsupported extend variant");
}
&Inst::Jump { ref dest } => {
// TODO: differentiate between as_off26() returning `None` for
// out-of-range vs. not-yet-finalized. The latter happens when we
// do early (fake) emission for size computation.
sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
let off = sink.cur_offset();
// Indicate that the jump uses a label, if so, so that a fixup can occur later.
if let Some(l) = dest.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch26);
sink.add_uncond_branch(off, off + 4, l);
}
// Emit the jump itself.
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
}
&Inst::Ret => {
sink.put4(0xd65f03c0);
@ -1138,71 +1286,47 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
&Inst::EpiloguePlaceholder => {
// Noop; this is just a placeholder for epilogues.
}
&Inst::Call {
ref dest,
loc,
opcode,
..
} => {
sink.add_reloc(loc, Reloc::Arm64Call, dest, 0);
&Inst::Call { ref info } => {
sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
sink.put4(enc_jump26(0b100101, 0));
if opcode.is_call() {
sink.add_call_site(loc, opcode);
if info.opcode.is_call() {
sink.add_call_site(info.loc, info.opcode);
}
}
&Inst::CallInd {
rn, loc, opcode, ..
} => {
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
if opcode.is_call() {
sink.add_call_site(loc, opcode);
&Inst::CallInd { ref info } => {
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
if info.opcode.is_call() {
sink.add_call_site(info.loc, info.opcode);
}
}
&Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
&Inst::CondBrLowered { target, kind } => match kind {
// TODO: handle >2^19 case by emitting a compound sequence with
// an unconditional (26-bit) branch. We need branch-relaxation
// adjustment machinery to enable this (because we don't want to
// always emit the long form).
CondBrKind::Zero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
}
CondBrKind::NotZero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
}
CondBrKind::Cond(c) => {
sink.put4(enc_cbr(
0b01010100,
target.as_off19().unwrap_or(0),
0b0,
c.bits(),
));
}
},
&Inst::CondBrLoweredCompound {
&Inst::CondBr {
taken,
not_taken,
kind,
} => {
// Conditional part first.
match kind {
CondBrKind::Zero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
}
CondBrKind::NotZero(reg) => {
sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
}
CondBrKind::Cond(c) => {
sink.put4(enc_cbr(
0b01010100,
taken.as_off19().unwrap_or(0),
0b0,
c.bits(),
));
}
let cond_off = sink.cur_offset();
if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
}
// Unconditional part.
sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
sink.put4(enc_conditional_br(taken, kind));
// Unconditional part next.
let uncond_off = sink.cur_offset();
if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
}
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
}
&Inst::OneWayCondBr { target, kind } => {
let off = sink.cur_offset();
if let Some(l) = target.as_label() {
sink.use_label_at_offset(off, l, LabelUse::Branch19);
}
sink.put4(enc_conditional_br(target, kind));
}
&Inst::IndirectBr { rn, .. } => {
sink.put4(enc_br(rn));
@ -1219,8 +1343,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.add_trap(srcloc, code);
sink.put4(0xd4a00000);
}
&Inst::Adr { rd, ref label } => {
let off = memlabel_finalize(sink.cur_offset_from_start(), label);
&Inst::Adr { rd, off } => {
assert!(off > -(1 << 20));
assert!(off < (1 << 20));
sink.put4(enc_adr(off, rd));
@ -1235,26 +1358,20 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
ridx,
rtmp1,
rtmp2,
ref targets,
ref info,
..
} => {
// This sequence is *one* instruction in the vcode, and is expanded only here at
// emission time, because we cannot allow the regalloc to insert spills/reloads in
// the middle; we depend on hardcoded PC-rel addressing below.
//
// N.B.: if PC-rel addressing on ADR below is changed, also update
// `Inst::with_block_offsets()` in aarch64/inst/mod.rs.
// Save index in a tmp (the live range of ridx only goes to start of this
// sequence; rtmp1 or rtmp2 may overwrite it).
let inst = Inst::gen_move(rtmp2, ridx, I64);
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Load address of jump table
let inst = Inst::Adr {
rd: rtmp1,
label: MemLabel::PCRel(16),
};
inst.emit(sink, flags);
let inst = Inst::Adr { rd: rtmp1, off: 16 };
inst.emit(sink, flags, state);
// Load value out of jump table
let inst = Inst::SLoad32 {
rd: rtmp2,
@ -1266,7 +1383,7 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Add base of jump table to jump-table-sourced block offset
let inst = Inst::AluRRR {
alu_op: ALUOp::Add64,
@ -1274,22 +1391,30 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Branch to computed address. (`targets` here is only used for successor queries
// and is not needed for emission.)
let inst = Inst::IndirectBr {
rn: rtmp1.to_reg(),
targets: vec![],
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
// Emit jump table (table of 32-bit offsets).
for target in targets {
let off = target.as_offset_words() * 4;
let off = i32::try_from(off).unwrap();
// cast i32 to u32 (two's-complement)
let off = off as u32;
sink.put4(off);
let jt_off = sink.cur_offset();
for &target in info.targets.iter() {
let word_off = sink.cur_offset();
let off_into_table = word_off - jt_off;
sink.use_label_at_offset(
word_off,
target.as_label().unwrap(),
LabelUse::PCRel32,
);
sink.put4(off_into_table);
}
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
// disable the worst-case-size check in this case.
start_off = sink.cur_offset();
}
&Inst::LoadConst64 { rd, const_data } => {
let inst = Inst::ULoad64 {
@ -1297,11 +1422,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.put8(const_data);
}
&Inst::LoadExtName {
@ -1315,11 +1440,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
mem: MemArg::Label(MemLabel::PCRel(8)),
srcloc: None, // can't cause a user trap.
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, flags);
inst.emit(sink, flags, state);
sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
if flags.emit_all_ones_funcaddrs() {
sink.put8(u64::max_value());
@ -1327,53 +1452,82 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
sink.put8(0);
}
}
&Inst::LoadAddr { rd, ref mem } => match *mem {
MemArg::FPOffset(fp_off) => {
let alu_op = if fp_off < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
{
let inst = Inst::AluRRImm12 {
alu_op,
rd,
imm12,
rn: fp_reg(),
};
inst.emit(sink, flags);
} else {
let const_insts =
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
for inst in const_insts {
inst.emit(sink, flags);
}
let inst = Inst::AluRRR {
alu_op,
rd,
rn: fp_reg(),
rm: rd.to_reg(),
};
inst.emit(sink, flags);
}
&Inst::LoadAddr { rd, ref mem } => {
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
for inst in mem_insts.into_iter() {
inst.emit(sink, flags, state);
}
_ => unimplemented!("{:?}", mem),
},
&Inst::GetPinnedReg { rd } => {
let inst = Inst::Mov {
rd,
rm: xreg(PINNED_REG),
let (reg, offset) = match mem {
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
inst.emit(sink, flags);
let abs_offset = if offset < 0 {
-offset as u64
} else {
offset as u64
};
let alu_op = if offset < 0 {
ALUOp::Sub64
} else {
ALUOp::Add64
};
if offset == 0 {
let mov = Inst::mov(rd, reg);
mov.emit(sink, flags, state);
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
alu_op,
rd,
rn: reg,
imm12,
};
add.emit(sink, flags, state);
} else {
// Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
// that no other instructions will be inserted here (we're emitting directly),
// and a live range of `tmp2` should not span this instruction, so this use
// should otherwise be correct.
debug_assert!(rd.to_reg() != tmp2_reg());
debug_assert!(reg != tmp2_reg());
let tmp = writable_tmp2_reg();
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
insn.emit(sink, flags, state);
}
let add = Inst::AluRRR {
alu_op,
rd,
rn: reg,
rm: tmp.to_reg(),
};
add.emit(sink, flags, state);
}
}
&Inst::SetPinnedReg { rm } => {
let inst = Inst::Mov {
rd: Writable::from_reg(xreg(PINNED_REG)),
rm,
};
inst.emit(sink, flags);
&Inst::VirtualSPOffsetAdj { offset } => {
debug!(
"virtual sp offset adjusted by {} -> {}",
offset,
state.virtual_sp_offset + offset
);
state.virtual_sp_offset += offset;
}
&Inst::EmitIsland { needed_space } => {
if sink.island_needed(needed_space + 4) {
let jump_around_label = sink.get_label();
let jmp = Inst::Jump {
dest: BranchTarget::Label(jump_around_label),
};
jmp.emit(sink, flags, state);
sink.emit_island();
sink.bind_label(jump_around_label);
}
}
}
let end_off = sink.cur_offset();
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
}
}

Просмотреть файл

@ -3,6 +3,7 @@ use crate::isa::aarch64::inst::*;
use crate::isa::test_utils;
use crate::settings;
use alloc::boxed::Box;
use alloc::vec::Vec;
#[test]
@ -1310,38 +1311,68 @@ fn test_aarch64_binemit() {
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::FPOffset(32768),
mem: MemArg::FPOffset(32768, I8),
srcloc: None,
},
"0F0090D2EF011D8BE10140F9",
"movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
"100090D2B063308B010240F9",
"movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::FPOffset(-32768),
mem: MemArg::FPOffset(-32768, I8),
srcloc: None,
},
"EFFF8F92EF011D8BE10140F9",
"movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
"F0FF8F92B063308B010240F9",
"movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::FPOffset(1048576), // 2^20
mem: MemArg::FPOffset(1048576, I8), // 2^20
srcloc: None,
},
"0F02A0D2EF011D8BE10140F9",
"movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
"1002A0D2B063308B010240F9",
"movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1
srcloc: None,
},
"2F0080D20F02A0F2EF011D8BE10140F9",
"movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
"300080D21002A0F2B063308B010240F9",
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 8, I64),
srcloc: None,
},
"E18040F8",
"ldur x1, [x7, #8]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 1024, I64),
srcloc: None,
},
"E10042F9",
"ldr x1, [x7, #1024]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 1048576, I64),
srcloc: None,
},
"1002A0D2F060308B010240F9",
"movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
));
insns.push((
@ -1801,6 +1832,7 @@ fn test_aarch64_binemit() {
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::UQAddScalar,
ty: I64,
},
"D50EF77E",
"uqadd d21, d22, d23",
@ -1811,6 +1843,7 @@ fn test_aarch64_binemit() {
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::SQAddScalar,
ty: I64,
},
"D50EF75E",
"sqadd d21, d22, d23",
@ -1821,6 +1854,7 @@ fn test_aarch64_binemit() {
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::UQSubScalar,
ty: I64,
},
"D52EF77E",
"uqsub d21, d22, d23",
@ -1831,10 +1865,83 @@ fn test_aarch64_binemit() {
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::SQSubScalar,
ty: I64,
},
"D52EF75E",
"sqsub d21, d22, d23",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Cmeq,
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I8X16,
},
"E38E386E",
"cmeq v3.16b, v23.16b, v24.16b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Cmgt,
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I8X16,
},
"E336384E",
"cmgt v3.16b, v23.16b, v24.16b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Cmge,
rd: writable_vreg(23),
rn: vreg(9),
rm: vreg(12),
ty: I8X16,
},
"373D2C4E",
"cmge v23.16b, v9.16b, v12.16b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Cmhi,
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I8X16,
},
"2534216E",
"cmhi v5.16b, v1.16b, v1.16b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Cmhs,
rd: writable_vreg(8),
rn: vreg(2),
rm: vreg(15),
ty: I8X16,
},
"483C2F6E",
"cmhs v8.16b, v2.16b, v15.16b",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Not,
rd: writable_vreg(2),
rn: vreg(1),
ty: I8X16,
},
"2258206E",
"mvn v2.16b, v1.16b",
));
insns.push((
Inst::Extend {
rd: writable_xreg(1),
@ -1955,7 +2062,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Zero(xreg(8)),
},
@ -1963,7 +2070,7 @@ fn test_aarch64_binemit() {
"cbz x8, 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::NotZero(xreg(8)),
},
@ -1971,7 +2078,7 @@ fn test_aarch64_binemit() {
"cbnz x8, 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Eq),
},
@ -1979,7 +2086,7 @@ fn test_aarch64_binemit() {
"b.eq 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ne),
},
@ -1988,7 +2095,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Hs),
},
@ -1996,7 +2103,7 @@ fn test_aarch64_binemit() {
"b.hs 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Lo),
},
@ -2004,7 +2111,7 @@ fn test_aarch64_binemit() {
"b.lo 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Mi),
},
@ -2012,7 +2119,7 @@ fn test_aarch64_binemit() {
"b.mi 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Pl),
},
@ -2020,7 +2127,7 @@ fn test_aarch64_binemit() {
"b.pl 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Vs),
},
@ -2028,7 +2135,7 @@ fn test_aarch64_binemit() {
"b.vs 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Vc),
},
@ -2036,7 +2143,7 @@ fn test_aarch64_binemit() {
"b.vc 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Hi),
},
@ -2044,7 +2151,7 @@ fn test_aarch64_binemit() {
"b.hi 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ls),
},
@ -2052,7 +2159,7 @@ fn test_aarch64_binemit() {
"b.ls 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Ge),
},
@ -2060,7 +2167,7 @@ fn test_aarch64_binemit() {
"b.ge 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Lt),
},
@ -2068,7 +2175,7 @@ fn test_aarch64_binemit() {
"b.lt 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Gt),
},
@ -2076,7 +2183,7 @@ fn test_aarch64_binemit() {
"b.gt 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Le),
},
@ -2084,7 +2191,7 @@ fn test_aarch64_binemit() {
"b.le 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Al),
},
@ -2092,7 +2199,7 @@ fn test_aarch64_binemit() {
"b.al 64",
));
insns.push((
Inst::CondBrLowered {
Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(64),
kind: CondBrKind::Cond(Cond::Nv),
},
@ -2101,7 +2208,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::CondBrLoweredCompound {
Inst::CondBr {
taken: BranchTarget::ResolvedOffset(64),
not_taken: BranchTarget::ResolvedOffset(128),
kind: CondBrKind::Cond(Cond::Le),
@ -2112,11 +2219,13 @@ fn test_aarch64_binemit() {
insns.push((
Inst::Call {
dest: ExternalName::testcase("test0"),
uses: Set::empty(),
defs: Set::empty(),
loc: SourceLoc::default(),
opcode: Opcode::Call,
info: Box::new(CallInfo {
dest: ExternalName::testcase("test0"),
uses: Vec::new(),
defs: Vec::new(),
loc: SourceLoc::default(),
opcode: Opcode::Call,
}),
},
"00000094",
"bl 0",
@ -2124,11 +2233,13 @@ fn test_aarch64_binemit() {
insns.push((
Inst::CallInd {
rn: xreg(10),
uses: Set::empty(),
defs: Set::empty(),
loc: SourceLoc::default(),
opcode: Opcode::CallIndirect,
info: Box::new(CallIndInfo {
rn: xreg(10),
uses: Vec::new(),
defs: Vec::new(),
loc: SourceLoc::default(),
opcode: Opcode::CallIndirect,
}),
},
"40013FD6",
"blr x10",
@ -2137,7 +2248,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::IndirectBr {
rn: xreg(3),
targets: vec![1, 2, 3],
targets: vec![],
},
"60001FD6",
"br x3",
@ -2148,7 +2259,7 @@ fn test_aarch64_binemit() {
insns.push((
Inst::Adr {
rd: writable_xreg(15),
label: MemLabel::PCRel((1 << 20) - 4),
off: (1 << 20) - 4,
},
"EFFF7F10",
"adr x15, pc+1048572",
@ -2163,6 +2274,15 @@ fn test_aarch64_binemit() {
"mov v8.8b, v4.8b",
));
insns.push((
Inst::FpuMove128 {
rd: writable_vreg(17),
rn: vreg(26),
},
"511FBA4E",
"mov v17.16b, v26.16b",
));
insns.push((
Inst::FpuRR {
fpu_op: FPUOp1::Abs32,
@ -2399,6 +2519,46 @@ fn test_aarch64_binemit() {
"fmadd d15, d30, d31, d1",
));
insns.push((
Inst::FpuRRI {
fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
rd: writable_vreg(2),
rn: vreg(5),
},
"A204202F",
"ushr v2.2s, v5.2s, #32",
));
insns.push((
Inst::FpuRRI {
fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
rd: writable_vreg(2),
rn: vreg(5),
},
"A204417F",
"ushr d2, d5, #63",
));
insns.push((
Inst::FpuRRI {
fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
rd: writable_vreg(4),
rn: vreg(10),
},
"44553F2F",
"sli v4.2s, v10.2s, #31",
));
insns.push((
Inst::FpuRRI {
fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
rd: writable_vreg(4),
rn: vreg(10),
},
"44557F7F",
"sli d4, d10, #63",
));
insns.push((
Inst::FpuToInt {
op: FpuToIntOp::F32ToU32,
@ -2685,6 +2845,15 @@ fn test_aarch64_binemit() {
"ldr d16, pc+8 ; b 12 ; data.f64 1",
));
insns.push((
Inst::LoadFpuConst128 {
rd: writable_vreg(5),
const_data: 0x0f0e0d0c0b0a09080706050403020100,
},
"4500009C05000014000102030405060708090A0B0C0D0E0F",
"ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
));
insns.push((
Inst::FpuCSel32 {
rd: writable_vreg(1),
@ -2791,19 +2960,11 @@ fn test_aarch64_binemit() {
let actual_printing = insn.show_rru(Some(&rru));
assert_eq!(expected_printing, actual_printing);
// Check the encoding is as expected.
let text_size = {
let mut code_sec = MachSectionSize::new(0);
insn.emit(&mut code_sec, &flags);
code_sec.size()
};
let mut sink = test_utils::TestCodeSink::new();
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, text_size);
let code_sec = sections.get_section(code_idx);
insn.emit(code_sec, &flags);
sections.emit(&mut sink);
let mut buffer = MachBuffer::new();
insn.emit(&mut buffer, &flags, &mut Default::default());
let buffer = buffer.finish();
buffer.emit(&mut sink);
let actual_encoding = &sink.stringify();
assert_eq!(expected_encoding, actual_encoding);
}

Просмотреть файл

@ -106,6 +106,85 @@ impl SImm7Scaled {
}
}
#[derive(Clone, Copy, Debug)]
pub struct FPULeftShiftImm {
pub amount: u8,
pub lane_size_in_bits: u8,
}
impl FPULeftShiftImm {
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
if amount < lane_size_in_bits {
Some(Self {
amount,
lane_size_in_bits,
})
} else {
None
}
}
pub fn enc(&self) -> u32 {
debug_assert!(self.lane_size_in_bits.is_power_of_two());
debug_assert!(self.lane_size_in_bits > self.amount);
// The encoding of the immediate follows the table below,
// where xs encode the shift amount.
//
// | lane_size_in_bits | encoding |
// +------------------------------+
// | 8 | 0001xxx |
// | 16 | 001xxxx |
// | 32 | 01xxxxx |
// | 64 | 1xxxxxx |
//
// The highest one bit is represented by `lane_size_in_bits`. Since
// `lane_size_in_bits` is a power of 2 and `amount` is less
// than `lane_size_in_bits`, they can be ORed
// together to produced the encoded value.
u32::from(self.lane_size_in_bits | self.amount)
}
}
#[derive(Clone, Copy, Debug)]
pub struct FPURightShiftImm {
pub amount: u8,
pub lane_size_in_bits: u8,
}
impl FPURightShiftImm {
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
if amount > 0 && amount <= lane_size_in_bits {
Some(Self {
amount,
lane_size_in_bits,
})
} else {
None
}
}
pub fn enc(&self) -> u32 {
debug_assert_ne!(0, self.amount);
// The encoding of the immediate follows the table below,
// where xs encodes the negated shift amount.
//
// | lane_size_in_bits | encoding |
// +------------------------------+
// | 8 | 0001xxx |
// | 16 | 001xxxx |
// | 32 | 01xxxxx |
// | 64 | 1xxxxxx |
//
// The shift amount is negated such that a shift ammount
// of 1 (in 64-bit) is encoded as 0b111111 and a shift
// amount of 64 is encoded as 0b000000,
// in the bottom 6 bits.
u32::from((self.lane_size_in_bits * 2) - self.amount)
}
}
/// a 9-bit signed offset.
#[derive(Clone, Copy, Debug)]
pub struct SImm9 {
@ -134,6 +213,11 @@ impl SImm9 {
pub fn bits(&self) -> u32 {
(self.value as u32) & 0x1ff
}
/// Signed value of immediate.
pub fn value(&self) -> i32 {
self.value as i32
}
}
/// An unsigned, scaled 12-bit offset.
@ -172,6 +256,16 @@ impl UImm12Scaled {
pub fn bits(&self) -> u32 {
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
}
/// Value after scaling.
pub fn value(&self) -> u32 {
self.value as u32
}
/// The value type which is the scaling base.
pub fn scale_ty(&self) -> Type {
self.scale_ty
}
}
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
@ -566,6 +660,18 @@ impl ShowWithRRU for SImm7Scaled {
}
}
impl ShowWithRRU for FPULeftShiftImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.amount)
}
}
impl ShowWithRRU for FPURightShiftImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.amount)
}
}
impl ShowWithRRU for SImm9 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,5 +1,6 @@
//! AArch64 ISA definitions: registers.
use crate::ir::types::*;
use crate::isa::aarch64::inst::InstSize;
use crate::machinst::*;
use crate::settings;
@ -20,23 +21,21 @@ pub const PINNED_REG: u8 = 21;
const XREG_INDICES: [u8; 31] = [
// X0 - X7
32, 33, 34, 35, 36, 37, 38, 39,
// X8 - X14
40, 41, 42, 43, 44, 45, 46,
// X15
59,
// X8 - X15
40, 41, 42, 43, 44, 45, 46, 47,
// X16, X17
47, 48,
58, 59,
// X18
60,
// X19, X20
49, 50,
48, 49,
// X21, put aside because it's the pinned register.
58,
57,
// X22 - X28
51, 52, 53, 54, 55, 56, 57,
// X29
50, 51, 52, 53, 54, 55, 56,
// X29 (FP)
61,
// X30
// X30 (LR)
62,
];
@ -125,14 +124,17 @@ pub fn writable_fp_reg() -> Writable<Reg> {
Writable::from_reg(fp_reg())
}
/// Get a reference to the "spill temp" register. This register is used to
/// compute the address of a spill slot when a direct offset addressing mode from
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
/// and reserve it for this purpose for simplicity; otherwise we need a
/// multi-stage analysis where we first determine how many spill slots we have,
/// then perhaps remove the reg from the pool and recompute regalloc.
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
///
/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
/// to live through call instructions.
pub fn spilltmp_reg() -> Reg {
xreg(15)
xreg(16)
}
/// Get a writable reference to the spilltmp reg.
@ -140,6 +142,20 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg())
}
/// Get a reference to the second temp register. We need this in some edge cases
/// where we need both the spilltmp and another temporary.
///
/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
/// free to use otherwise.
pub fn tmp2_reg() -> Reg {
xreg(17)
}
/// Get a writable reference to the tmp2 reg.
pub fn writable_tmp2_reg() -> Writable<Reg> {
Writable::from_reg(tmp2_reg())
}
/// Create the register universe for AArch64.
pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
let mut regs = vec![];
@ -173,7 +189,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
for i in 0u8..32u8 {
// See above for excluded registers.
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
continue;
}
let reg = Reg::new_real(
@ -191,7 +207,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: x_reg_base as usize,
last: x_reg_last as usize,
suggested_scratch: Some(XREG_INDICES[13] as usize),
suggested_scratch: Some(XREG_INDICES[19] as usize),
});
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
first: v_reg_base as usize,
@ -211,7 +227,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
regs.len()
};
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
regs.push((xreg(16).to_real_reg(), "x16".to_string()));
regs.push((xreg(17).to_real_reg(), "x17".to_string()));
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
regs.push((link_reg().to_real_reg(), "lr".to_string()));
@ -259,13 +276,17 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
s
}
/// Show a vector register when its use as a 32-bit or 64-bit float is known.
/// Show a vector register.
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::V128 {
return s;
}
let prefix = if size.is32() { "s" } else { "d" };
let prefix = match size {
InstSize::Size32 => "s",
InstSize::Size64 => "d",
InstSize::Size128 => "q",
};
s.replace_range(0..1, prefix);
s
}
@ -291,3 +312,17 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
}
s
}
/// Show a vector register.
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
assert_eq!(RegClass::V128, reg.get_class());
let mut s = reg.show_rru(mb_rru);
match ty {
I8X16 => s.push_str(".16b"),
F32X2 => s.push_str(".2s"),
_ => unimplemented!(),
}
s
}

Просмотреть файл

@ -14,12 +14,14 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::CodegenResult;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::AArch64Backend;
use super::lower_inst;
use log::debug;
use regalloc::{Reg, RegClass, Writable};
//============================================================================
@ -104,18 +106,11 @@ pub(crate) enum ResultRegImmShift {
}
//============================================================================
// Instruction input and output "slots".
// Instruction input "slots".
//
// We use these types to refer to operand numbers, and result numbers, together
// with the associated instruction, in a type-safe way.
/// Identifier for a particular output of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InsnOutput {
pub(crate) insn: IRInst,
pub(crate) output: usize,
}
/// Identifier for a particular input of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InsnInput {
@ -123,95 +118,55 @@ pub(crate) struct InsnInput {
pub(crate) input: usize,
}
/// Producer of a value: either a previous instruction's output, or a register that will be
/// codegen'd separately.
/// Identifier for a particular output of an instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum InsnInputSource {
Output(InsnOutput),
Reg(Reg),
}
impl InsnInputSource {
fn as_output(self) -> Option<InsnOutput> {
match self {
InsnInputSource::Output(o) => Some(o),
_ => None,
}
}
}
fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
assert!(num <= ctx.num_inputs(output.insn));
InsnInput {
insn: output.insn,
input: num,
}
}
/// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
/// register otherwise.
fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
let out = InsnOutput {
insn: input_inst,
output: result_num,
};
InsnInputSource::Output(out)
} else {
let reg = ctx.input(input.insn, input.input);
InsnInputSource::Reg(reg)
}
pub(crate) struct InsnOutput {
pub(crate) insn: IRInst,
pub(crate) output: usize,
}
//============================================================================
// Lowering: convert instruction outputs to result types.
// Lowering: convert instruction inputs to forms that we can use.
/// Lower an instruction output to a 64-bit constant, if possible.
pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
/// Lower an instruction input to a 64-bit constant, if possible.
pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
let input = ctx.get_input(input.insn, input.input);
input.constant
}
/// Lower an instruction input to a constant register-shift amount, if possible.
pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
) -> Option<ShiftOpShiftImm> {
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
}
pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<u128> {
if out.output > 0 {
None
} else {
let inst_data = ctx.data(out.insn);
if inst_data.opcode() == Opcode::Null {
Some(0)
} else {
match inst_data {
&InstructionData::UnaryImm { opcode: _, imm } => {
// Only has Into for i64; we use u64 elsewhere, so we cast.
let imm: i64 = imm.into();
Some(imm as u64)
}
&InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
&InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
&InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
_ => None,
match inst_data {
&InstructionData::UnaryConst {
opcode: _,
constant_handle,
} => {
let mut bytes = [0u8; 16];
let c = ctx.get_constant_data(constant_handle).clone().into_vec();
assert_eq!(c.len(), 16);
bytes.copy_from_slice(&c);
Some(u128::from_le_bytes(bytes))
}
_ => None,
}
}
}
pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<f32> {
output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
}
pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<f64> {
output_to_const(ctx, out).map(|value| f64::from_bits(value))
}
/// Lower an instruction output to a constant register-shift amount, if possible.
pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
ctx: &mut C,
out: InsnOutput,
) -> Option<ShiftOpShiftImm> {
output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
}
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
/// parameter to `input_to_*` below.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@ -237,9 +192,9 @@ impl NarrowValueMode {
}
}
/// Lower an instruction output to a reg.
/// Allocate a register for an instruction output and return it.
pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
ctx.output(out.insn, out.output)
ctx.get_output(out.insn, out.output)
}
/// Lower an instruction input to a reg.
@ -252,13 +207,31 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> Reg {
debug!("input_to_reg: input {:?}", input);
let ty = ctx.input_ty(input.insn, input.input);
let from_bits = ty_bits(ty) as u8;
let in_reg = ctx.input(input.insn, input.input);
let inputs = ctx.get_input(input.insn, input.input);
let in_reg = if let Some(c) = inputs.constant {
let masked = if from_bits < 64 {
c & ((1u64 << from_bits) - 1)
} else {
c
};
// Generate constants fresh at each use to minimize long-range register pressure.
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() {
ctx.emit(inst);
}
to_reg.to_reg()
} else {
ctx.use_input_reg(inputs);
inputs.reg
};
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -269,7 +242,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -282,18 +255,23 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
let tmp = ctx.tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 64,
});
tmp.to_reg()
if inputs.constant.is_some() {
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 64,
});
tmp.to_reg()
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -304,6 +282,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(_, 64) => in_reg,
(_, 128) => in_reg,
_ => panic!(
"Unsupported input width: input ty {} bits {} mode {:?}",
@ -313,8 +292,6 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
}
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
/// This does not actually codegen the source instruction; it just uses the
/// vreg into which the source instruction will generate its value.
///
/// The `narrow_mode` flag indicates whether the consumer of this value needs
/// the high bits clear. For many operations, such as an add/sub/mul or any
@ -330,23 +307,18 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRS {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
let insn = out.insn;
assert!(out.output <= ctx.num_outputs(insn));
let inputs = ctx.get_input(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
if op == Opcode::Ishl {
let shiftee = get_input(ctx, out, 0);
let shift_amt = get_input(ctx, out, 1);
let shiftee = InsnInput { insn, input: 0 };
let shift_amt = InsnInput { insn, input: 1 };
// Can we get the shift amount as an immediate?
if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
let reg = input_to_reg(ctx, shiftee, narrow_mode);
ctx.merged(insn);
ctx.merged(shift_amt_out.insn);
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
}
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
let reg = input_to_reg(ctx, shiftee, narrow_mode);
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
}
}
}
@ -364,11 +336,10 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSE {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
let insn = out.insn;
assert!(out.output <= ctx.num_outputs(insn));
let inputs = ctx.get_input(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
let out_ty = ctx.output_ty(insn, out.output);
let out_ty = ctx.output_ty(insn, 0);
let out_bits = ty_bits(out_ty);
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
@ -378,7 +349,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
&& ((narrow_mode.is_32bit() && out_bits < 32)
|| (!narrow_mode.is_32bit() && out_bits < 64))
{
let reg = output_to_reg(ctx, out);
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
let extendop = match (narrow_mode, out_bits) {
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
ExtendOp::SXTB
@ -402,15 +373,14 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
return ResultRSE::RegExtend(reg.to_reg(), extendop);
return ResultRSE::RegExtend(reg, extendop);
}
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
if op == Opcode::Uextend || op == Opcode::Sextend {
assert!(out_bits == 32 || out_bits == 64);
let sign_extend = op == Opcode::Sextend;
let extendee = get_input(ctx, out, 0);
let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
let inner_ty = ctx.input_ty(insn, 0);
let inner_bits = ty_bits(inner_ty);
assert!(inner_bits < out_bits);
let extendop = match (sign_extend, inner_bits) {
@ -424,8 +394,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
(false, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
ctx.merged(insn);
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
return ResultRSE::RegExtend(reg, extendop);
}
}
@ -438,12 +407,9 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSEImm12 {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
ctx.merged(out.insn);
return ResultRSEImm12::Imm12(i);
}
if let Some(imm_value) = input_to_const(ctx, input) {
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
return ResultRSEImm12::Imm12(i);
}
}
@ -455,14 +421,11 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSImmLogic {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
let ty = ctx.output_ty(out.insn, out.output);
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
ctx.merged(out.insn);
return ResultRSImmLogic::ImmLogic(i);
}
if let Some(imm_value) = input_to_const(ctx, input) {
let ty = ctx.input_ty(input.insn, input.input);
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
return ResultRSImmLogic::ImmLogic(i);
}
}
@ -473,12 +436,9 @@ pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
) -> ResultRegImmShift {
if let InsnInputSource::Output(out) = input_source(ctx, input) {
if let Some(imm_value) = output_to_const(ctx, out) {
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
ctx.merged(out.insn);
return ResultRegImmShift::ImmShift(immshift);
}
if let Some(imm_value) = input_to_const(ctx, input) {
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
return ResultRegImmShift::ImmShift(immshift);
}
}
@ -584,12 +544,10 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
// mul instructions (Load/StoreComplex don't include scale factors).
// Handle one reg and offset that fits in immediate, if possible.
// Handle one reg and offset.
if addends.len() == 1 {
let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) {
return memarg;
}
return MemArg::RegOffset(reg, offset as i64, elem_ty);
}
// Handle two regs and a zero offset, if possible.
@ -600,7 +558,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
}
// Otherwise, generate add instructions.
let addr = ctx.tmp(RegClass::I64, I64);
let addr = ctx.alloc_tmp(RegClass::I64, I64);
// Get the const into a reg.
lower_constant_u64(ctx, addr.clone(), offset as u64);
@ -612,7 +570,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// In an addition, the stack register is the zero register, so divert it to another
// register just before doing the actual add.
let reg = if reg == stack_reg() {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
ctx.emit(Inst::Mov {
rd: tmp,
rm: stack_reg(),
@ -659,6 +617,14 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::load_fp_constant64(rd, value));
}
pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
rd: Writable<Reg>,
value: u128,
) {
ctx.emit(Inst::load_fp_constant128(rd, value));
}
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
match cc {
IntCC::Equal => Cond::Eq,
@ -750,6 +716,7 @@ pub fn ty_bits(ty: Type) -> usize {
B64 | I64 | F64 => 64,
B128 | I128 => 128,
IFLAGS | FFLAGS => 32,
I8X16 | B8X16 => 128,
_ => panic!("ty_bits() on unknown type: {:?}", ty),
}
}
@ -757,7 +724,7 @@ pub fn ty_bits(ty: Type) -> usize {
pub(crate) fn ty_is_int(ty: Type) -> bool {
match ty {
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
F32 | F64 | B128 | I128 => false,
F32 | F64 | B128 | I128 | I8X16 => false,
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
}
@ -823,24 +790,29 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
}
}
/// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
/// Checks for an instance of `op` feeding the given input.
pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
c: &mut C,
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
if let InsnInputSource::Output(out) = input_source(c, input) {
let data = c.data(out.insn);
let inputs = c.get_input(input.insn, input.input);
debug!(
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
input, inputs, op
);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
debug!(" -> input inst {:?}", data);
if data.opcode() == op {
c.merged(out.insn);
return Some(out.insn);
return Some(src_inst);
}
}
None
}
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
/// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
/// Bint or a bitcast).
///
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
/// a bit more generic.
@ -850,21 +822,19 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
op: Opcode,
conv: Opcode,
) -> Option<IRInst> {
if let Some(ret) = maybe_input_insn(c, input, op) {
return Some(ret);
}
if let InsnInputSource::Output(out) = input_source(c, input) {
let data = c.data(out.insn);
let inputs = c.get_input(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
if data.opcode() == conv {
let conv_insn = out.insn;
let conv_input = InsnInput {
insn: conv_insn,
input: 0,
};
if let Some(inner) = maybe_input_insn(c, conv_input, op) {
c.merged(conv_insn);
return Some(inner);
let inputs = c.get_input(src_inst, 0);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
}
}
@ -876,6 +846,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
insn: IRInst,
is_signed: bool,
) {
debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
let ty = ctx.input_ty(insn, 0);
let bits = ty_bits(ty);
let narrow_mode = match (bits <= 32, is_signed) {
@ -897,6 +868,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
let ty = ctx.input_ty(insn, 0);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rd = writable_zero_reg();
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
@ -934,17 +906,21 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
impl LowerBackend for AArch64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
lower_inst::lower_insn_to_regs(ctx, ir_inst);
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
&self,
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {
Some(xreg(PINNED_REG))
}
}

Просмотреть файл

@ -1,17 +1,20 @@
//! Lower a single Cranelift instruction into vcode.
use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::{CodegenError, CodegenResult};
use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*;
use regalloc::RegClass;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::convert::TryFrom;
use smallvec::SmallVec;
@ -19,7 +22,10 @@ use smallvec::SmallVec;
use super::lower::*;
/// Actually codegen an instruction's results into registers.
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
.map(|i| InsnInput { insn, input: i })
@ -35,17 +41,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
match op {
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
let value = output_to_const(ctx, outputs[0]).unwrap();
let value = ctx.get_constant(insn).unwrap();
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_u64(ctx, rd, value);
}
Opcode::F32const => {
let value = output_to_const_f32(ctx, outputs[0]).unwrap();
let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_f32(ctx, rd, value);
}
Opcode::F64const => {
let value = output_to_const_f64(ctx, outputs[0]).unwrap();
let value = f64::from_bits(ctx.get_constant(insn).unwrap());
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_f64(ctx, rd, value);
}
@ -79,8 +85,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
VecALUOp::UQAddScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@ -91,6 +97,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec64 {
rd,
@ -110,8 +117,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
VecALUOp::UQSubScalar
};
let va = ctx.tmp(RegClass::V128, I128);
let vb = ctx.tmp(RegClass::V128, I128);
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
@ -122,6 +129,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec64 {
rd,
@ -271,7 +279,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
@ -297,7 +305,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 20;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::Zero(rm),
});
@ -324,7 +332,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
nzcv: NZCV::new(false, false, false, false),
cond: Cond::Eq,
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(12),
kind: CondBrKind::Cond(Cond::Vc),
});
@ -337,7 +345,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
@ -493,7 +501,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
let tmp = ctx.tmp(RegClass::I64, ty);
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
ctx.emit(Inst::AluRRR {
alu_op,
rd: tmp,
@ -516,7 +524,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Really ty_bits_size - rn, but the upper bits of the result are
// ignored (because of the implicit masking done by the instruction),
// so this is equivalent to negating the input.
let tmp = ctx.tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub32,
rd: tmp,
@ -529,7 +537,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
};
// Explicitly mask the rotation count.
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmLogic {
alu_op: ALUOp::And32,
rd: tmp_masked_rm,
@ -538,8 +546,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
});
let tmp_masked_rm = tmp_masked_rm.to_reg();
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp2 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::Sub32,
rd: tmp1,
@ -578,7 +586,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
}
immshift.imm &= ty_bits_size - 1;
let tmp1 = ctx.tmp(RegClass::I64, I32);
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr32,
rd: tmp1,
@ -683,7 +691,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// and fix the sequence below to work properly for this.
let narrow_mode = NarrowValueMode::ZeroExtend64;
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
// the rest of the code is identical to the 64-bit version.
@ -870,6 +878,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
_ => panic!("Unsupported size in load"),
});
}
@ -909,6 +918,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
(32, true) => Inst::FpuStore32 { rd, mem, srcloc },
(64, false) => Inst::Store64 { rd, mem, srcloc },
(64, true) => Inst::FpuStore64 { rd, mem, srcloc },
(128, _) => Inst::FpuStore128 { rd, mem, srcloc },
_ => panic!("Unsupported size in store"),
});
}
@ -992,7 +1002,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
}
Opcode::Bitselect => {
let tmp = ctx.tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let rd = output_to_reg(ctx, outputs[0]);
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
@ -1145,12 +1155,66 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
(false, true) => NarrowValueMode::SignExtend64,
(false, false) => NarrowValueMode::ZeroExtend64,
};
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
ctx.emit(Inst::CondSet { cond, rd });
if ty_bits(ty) < 128 {
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
ctx.emit(Inst::CondSet { cond, rd });
} else {
if ty != I8X16 {
return Err(CodegenError::Unsupported(format!(
"unsupported simd type: {:?}",
ty
)));
}
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
// 'Less than' operations are implemented by swapping
// the order of operands and using the 'greater than'
// instructions.
// 'Not equal' is implemented with 'equal' and inverting
// the result.
let (alu_op, swap) = match cond {
Cond::Eq => (VecALUOp::Cmeq, false),
Cond::Ne => (VecALUOp::Cmeq, false),
Cond::Ge => (VecALUOp::Cmge, false),
Cond::Gt => (VecALUOp::Cmgt, false),
Cond::Le => (VecALUOp::Cmge, true),
Cond::Lt => (VecALUOp::Cmgt, true),
Cond::Hs => (VecALUOp::Cmhs, false),
Cond::Hi => (VecALUOp::Cmhi, false),
Cond::Ls => (VecALUOp::Cmhs, true),
Cond::Lo => (VecALUOp::Cmhi, true),
_ => unreachable!(),
};
if swap {
std::mem::swap(&mut rn, &mut rm);
}
ctx.emit(Inst::VecRRR {
alu_op,
rd,
rn,
rm,
ty,
});
if cond == Cond::Ne {
ctx.emit(Inst::VecMisc {
op: VecMisc2::Not,
rd,
rn: rd.to_reg(),
ty: I8X16,
});
}
}
}
Opcode::Fcmp => {
@ -1188,7 +1252,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::Trapif | Opcode::Trapff => {
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
let cond = if op == Opcode::Trapif {
let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
let condcode = inst_condcode(ctx.data(insn)).unwrap();
let cond = lower_condcode(condcode);
// The flags must not have been clobbered by any other
// instruction between the iadd_ifcout and this instruction, as
// verified by the CLIF validator; so we can simply use the
// flags here.
cond
} else if op == Opcode::Trapif {
let condcode = inst_condcode(ctx.data(insn)).unwrap();
let cond = lower_condcode(condcode);
let is_signed = condcode_is_signed(condcode);
@ -1211,7 +1283,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
// Branch around the break instruction with inverted cond. Go straight to lowered
// one-target form; this is logically part of a single-in single-out template lowering.
let cond = cond.invert();
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(cond),
});
@ -1233,11 +1305,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::FuncAddr => {
let rd = output_to_reg(ctx, outputs[0]);
let extname = ctx.call_target(insn).unwrap().clone();
let (extname, _) = ctx.call_target(insn).unwrap();
let extname = extname.clone();
let loc = ctx.srcloc(insn);
ctx.emit(Inst::LoadExtName {
rd,
name: extname,
name: Box::new(extname),
srcloc: loc,
offset: 0,
});
@ -1249,12 +1322,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::SymbolValue => {
let rd = output_to_reg(ctx, outputs[0]);
let (extname, offset) = ctx.symbol_value(insn).unwrap();
let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
let extname = extname.clone();
let loc = ctx.srcloc(insn);
ctx.emit(Inst::LoadExtName {
rd,
name: extname,
name: Box::new(extname),
srcloc: loc,
offset,
});
@ -1262,54 +1335,50 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::Call | Opcode::CallIndirect => {
let loc = ctx.srcloc(insn);
let (abi, inputs) = match op {
let (mut abi, inputs) = match op {
Opcode::Call => {
let extname = ctx.call_target(insn).unwrap();
let (extname, dist) = ctx.call_target(insn).unwrap();
let extname = extname.clone();
let sig = ctx.call_sig(insn).unwrap();
assert!(inputs.len() == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
(
AArch64ABICall::from_func(sig, &extname, dist, loc)?,
&inputs[..],
)
}
Opcode::CallIndirect => {
let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
let sig = ctx.call_sig(insn).unwrap();
assert!(inputs.len() - 1 == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
(AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
}
_ => unreachable!(),
};
for inst in abi.gen_stack_pre_adjust().into_iter() {
ctx.emit(inst);
}
abi.emit_stack_pre_adjust(ctx);
assert!(inputs.len() == abi.num_args());
for (i, input) in inputs.iter().enumerate() {
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
ctx.emit(inst);
}
}
for inst in abi.gen_call().into_iter() {
ctx.emit(inst);
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
}
abi.emit_call(ctx);
for (i, output) in outputs.iter().enumerate() {
let retval_reg = output_to_reg(ctx, *output);
ctx.emit(abi.gen_copy_retval_to_reg(i, retval_reg));
}
for inst in abi.gen_stack_post_adjust().into_iter() {
ctx.emit(inst);
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
}
abi.emit_stack_post_adjust(ctx);
}
Opcode::GetPinnedReg => {
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(Inst::GetPinnedReg { rd });
ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
}
Opcode::SetPinnedReg => {
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::SetPinnedReg { rm });
ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
}
Opcode::Spill
@ -1340,8 +1409,20 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
panic!("Branch opcode reached non-branch lowering logic!");
}
Opcode::Vconst
| Opcode::Shuffle
Opcode::Vconst => {
let value = output_to_const_f128(ctx, outputs[0]).unwrap();
let rd = output_to_reg(ctx, outputs[0]);
lower_constant_f128(ctx, rd, value);
}
Opcode::RawBitcast => {
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = output_to_reg(ctx, outputs[0]);
let ty = ctx.input_ty(insn, 0);
ctx.emit(Inst::gen_move(rd, rm, ty));
}
Opcode::Shuffle
| Opcode::Vsplit
| Opcode::Vconcat
| Opcode::Vselect
@ -1350,15 +1431,20 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
| Opcode::Splat
| Opcode::Insertlane
| Opcode::Extractlane
| Opcode::RawBitcast
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Sload32x2 => {
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO
panic!("Vector ops not implemented.");
}
@ -1452,54 +1538,38 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::Fcopysign => {
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
//
// (64 bits for example, 32-bit sequence is analogous):
// This is a scalar Fcopysign.
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
//
// MOV Xtmp1, Dinput0
// MOV Xtmp2, Dinput1
// AND Xtmp2, 0x8000_0000_0000_0000
// BIC Xtmp1, 0x8000_0000_0000_0000
// ORR Xtmp1, Xtmp1, Xtmp2
// MOV Doutput, Xtmp1
// mov vd, vn
// ushr vtmp, vm, #63 / #31
// sli vd, vtmp, #63 / #31
let ty = ctx.output_ty(insn, 0);
let bits = ty_bits(ty);
let bits = ty_bits(ty) as u8;
assert!(bits == 32 || bits == 64);
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = output_to_reg(ctx, outputs[0]);
let tmp1 = ctx.tmp(RegClass::I64, I64);
let tmp2 = ctx.tmp(RegClass::I64, I64);
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
let imml = if bits == 32 {
ImmLogic::maybe_from_u64(0x8000_0000, I32).unwrap()
} else {
ImmLogic::maybe_from_u64(0x8000_0000_0000_0000, I64).unwrap()
};
let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
ctx.emit(Inst::AluRRImmLogic {
alu_op,
rd: tmp2,
rn: tmp2.to_reg(),
imml: imml.clone(),
let tmp = ctx.alloc_tmp(RegClass::V128, F64);
// Copy LHS to rd.
ctx.emit(Inst::FpuMove64 { rd, rn });
// Copy the sign bit to the lowest bit in tmp.
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
rd: tmp,
rn: rm,
});
let alu_op = choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64);
ctx.emit(Inst::AluRRImmLogic {
alu_op,
rd: tmp1,
rn: tmp1.to_reg(),
imml,
});
let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
ctx.emit(Inst::AluRRR {
alu_op,
rd: tmp1,
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
ctx.emit(Inst::MovToVec64 {
// Insert the bit from tmp into the sign bit of rd.
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
rd,
rn: tmp1.to_reg(),
rn: tmp.to_reg(),
});
}
@ -1531,14 +1601,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
} else {
ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
}
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
});
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
ctx.emit(Inst::Udf { trap_info });
let tmp = ctx.tmp(RegClass::V128, I128);
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
@ -1572,7 +1642,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
});
@ -1585,7 +1655,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
});
@ -1615,7 +1685,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
});
@ -1628,7 +1698,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
rn,
rm: tmp.to_reg(),
});
ctx.emit(Inst::CondBrLowered {
ctx.emit(Inst::OneWayCondBr {
target: BranchTarget::ResolvedOffset(8),
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
});
@ -1704,8 +1774,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
_ => unreachable!(),
};
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 {
@ -1790,6 +1860,35 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
});
}
Opcode::IaddIfcout => {
// This is a two-output instruction that is needed for the
// legalizer's explicit heap-check sequence, among possible other
// uses. Its second output is a flags output only ever meant to
// check for overflow using the
// `backend.unsigned_add_overflow_condition()` condition.
//
// Note that the CLIF validation will ensure that no flag-setting
// operation comes between this IaddIfcout and its use (e.g., a
// Trapif). Thus, we can rely on implicit communication through the
// processor flags rather than explicitly generating flags into a
// register. We simply use the variant of the add instruction that
// sets flags (`adds`) here.
// Ensure that the second output isn't directly called for: it
// should only be used by a flags-consuming op, which will directly
// understand this instruction and merge the comparison.
assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg()));
// Now handle the iadd as above, except use an AddS opcode that sets
// flags.
let rd = output_to_reg(ctx, outputs[0]);
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
let ty = ty.unwrap();
let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
}
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::UdivImm
@ -1800,7 +1899,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
| Opcode::IaddCin
| Opcode::IaddIfcin
| Opcode::IaddCout
| Opcode::IaddIfcout
| Opcode::IaddCarry
| Opcode::IaddIfcarry
| Opcode::IsubBin
@ -1849,6 +1947,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
| Opcode::X86Pmaxu
| Opcode::X86Pmins
| Opcode::X86Pminu
| Opcode::X86Pmullq
| Opcode::X86Pmuludq
| Opcode::X86Packss
| Opcode::X86Punpckh
| Opcode::X86Punpckl
@ -1860,14 +1960,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
Opcode::AvgRound => unimplemented!(),
Opcode::TlsValue => unimplemented!(),
}
Ok(())
}
pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx: &mut C,
branches: &[IRInst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
) {
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
@ -1881,18 +1983,14 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
let op0 = ctx.data(branches[0]).opcode();
let op1 = ctx.data(branches[1]).opcode();
//println!(
// "lowering two-branch group: opcodes are {:?} and {:?}",
// op0, op1
//);
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Block(targets[0]);
let taken = BranchTarget::Label(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Block(targets[1]),
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
Opcode::Jump => BranchTarget::Label(targets[1]),
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
match op0 {
Opcode::Brz | Opcode::Brnz => {
let flag_input = InsnInput {
@ -1952,6 +2050,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
Opcode::BrIcmp => {
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let is_signed = condcode_is_signed(condcode);
let ty = ctx.input_ty(branches[0], 0);
let bits = ty_bits(ty);
@ -1984,13 +2084,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
Opcode::Brif => {
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let is_signed = condcode_is_signed(condcode);
let flag_input = InsnInput {
insn: branches[0],
@ -2001,7 +2103,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
} else {
// If the ifcmp result is actually placed in a
@ -2011,7 +2113,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
}
@ -2019,6 +2121,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
Opcode::Brff => {
let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
let cond = lower_fp_condcode(condcode);
let kind = CondBrKind::Cond(cond);
let flag_input = InsnInput {
insn: branches[0],
input: 0,
@ -2028,7 +2131,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
} else {
// If the ffcmp result is actually placed in a
@ -2038,7 +2141,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::CondBr {
taken,
not_taken,
kind: CondBrKind::Cond(cond),
kind,
});
}
}
@ -2055,12 +2158,15 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
// fills in `targets[0]` with our fallthrough block, so this
// is valid for both Jump and Fallthrough.
ctx.emit(Inst::Jump {
dest: BranchTarget::Block(targets[0]),
dest: BranchTarget::Label(targets[0]),
});
}
Opcode::BrTable => {
// Expand `br_table index, default, JT` to:
//
// emit_island // this forces an island at this point
// // if the jumptable would push us past
// // the deadline
// subs idx, #jt_size
// b.hs default
// adr vTmp1, PC+16
@ -2070,6 +2176,11 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
// [jumptable offsets relative to JT base]
let jt_size = targets.len() - 1;
assert!(jt_size <= std::u32::MAX as usize);
ctx.emit(Inst::EmitIsland {
needed_space: 4 * (6 + jt_size) as CodeOffset,
});
let ridx = input_to_reg(
ctx,
InsnInput {
@ -2079,8 +2190,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
NarrowValueMode::ZeroExtend32,
);
let rtmp1 = ctx.tmp(RegClass::I64, I32);
let rtmp2 = ctx.tmp(RegClass::I64, I32);
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
// Bounds-check and branch to default.
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
@ -2099,10 +2210,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
rm: rtmp1.to_reg(),
});
}
let default_target = BranchTarget::Block(targets[0]);
ctx.emit(Inst::CondBrLowered {
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
let default_target = BranchTarget::Label(targets[0]);
ctx.emit(Inst::OneWayCondBr {
target: default_target.clone(),
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
});
// Emit the compound instruction that does:
@ -2123,19 +2234,23 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
let jt_targets: Vec<BranchTarget> = targets
.iter()
.skip(1)
.map(|bix| BranchTarget::Block(*bix))
.map(|bix| BranchTarget::Label(*bix))
.collect();
let targets_for_term: Vec<BlockIndex> = targets.to_vec();
let targets_for_term: Vec<MachLabel> = targets.to_vec();
ctx.emit(Inst::JTSequence {
ridx,
rtmp1,
rtmp2,
targets: jt_targets,
targets_for_term,
info: Box::new(JTSequenceInfo {
targets: jt_targets,
targets_for_term: targets_for_term,
}),
});
}
_ => panic!("Unknown branch type!"),
}
}
Ok(())
}

Просмотреть файл

@ -1,5 +1,6 @@
//! ARM 64-bit Instruction Set Architecture.
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{
@ -15,7 +16,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, Triple};
// New backend:
mod abi;
mod inst;
pub(crate) mod inst;
mod lower;
mod lower_inst;
@ -25,12 +26,18 @@ use inst::create_reg_universe;
pub struct AArch64Backend {
triple: Triple,
flags: settings::Flags,
reg_universe: RealRegUniverse,
}
impl AArch64Backend {
/// Create a new AArch64 backend with the given (shared) flags.
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
AArch64Backend { triple, flags }
let reg_universe = create_reg_universe(&flags);
AArch64Backend {
triple,
flags,
reg_universe,
}
}
/// This performs lowering to VCode, register-allocates the code, computes block layout and
@ -40,7 +47,7 @@ impl AArch64Backend {
func: &Function,
flags: settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> {
let abi = Box::new(abi::AArch64ABIBody::new(func, flags));
let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
compile::compile::<AArch64Backend>(func, self, abi)
}
}
@ -53,7 +60,7 @@ impl MachBackend for AArch64Backend {
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let sections = vcode.emit();
let buffer = vcode.emit();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
@ -62,8 +69,10 @@ impl MachBackend for AArch64Backend {
None
};
let buffer = buffer.finish();
Ok(MachCompileResult {
sections,
buffer,
frame_size,
disasm,
})
@ -81,8 +90,21 @@ impl MachBackend for AArch64Backend {
&self.flags
}
fn reg_universe(&self) -> RealRegUniverse {
create_reg_universe(&self.flags)
fn reg_universe(&self) -> &RealRegUniverse {
&self.reg_universe
}
fn unsigned_add_overflow_condition(&self) -> IntCC {
// Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on
// overflow of an add.
IntCC::UnsignedGreaterThanOrEqual
}
fn unsigned_sub_overflow_condition(&self) -> IntCC {
// unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on
// underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the
// opposite of x86).
IntCC::UnsignedLessThan
}
}
@ -134,8 +156,8 @@ mod test {
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
);
let sections = backend.compile_function(&mut func, false).unwrap().sections;
let code = &sections.sections[0].data;
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
let code = &buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
@ -149,7 +171,7 @@ mod test {
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden);
assert_eq!(code, &golden[..]);
}
#[test]
@ -192,34 +214,32 @@ mod test {
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = &result.sections.sections[0].data;
let code = &result.buffer.data[..];
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// mov x1, x0
// mov x0, #0x1234
// add w1, w1, w0
// mov w2, w1
// cbz x2, ...
// mov w2, w1
// cbz x2, ...
// sub w0, w1, w0
// mov x1, #0x1234 // #4660
// add w0, w0, w1
// mov w1, w0
// cbnz x1, 0x28
// mov x1, #0x1234 // #4660
// add w1, w0, w1
// mov w1, w1
// cbnz x1, 0x18
// mov w1, w0
// cbnz x1, 0x18
// mov x1, #0x1234 // #4660
// sub w0, w0, w1
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
// add w2, w1, w0
// mov w2, w2
// cbnz x2, ... <---- compound branch (cond / uncond)
// b ... <----
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
193, 168, 192, 3, 95, 214,
];
assert_eq!(code, &golden);
assert_eq!(code, &golden[..]);
}
}

Просмотреть файл

@ -17,6 +17,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use core::any::Any;
use core::fmt;
use target_lexicon::{Architecture, Triple};
@ -135,6 +136,10 @@ impl TargetIsa for Isa {
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
}
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
}
impl fmt::Display for Isa {

Просмотреть файл

@ -66,6 +66,7 @@ use crate::settings::SetResult;
use crate::timing;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use core::any::Any;
use core::fmt;
use core::fmt::{Debug, Formatter};
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
@ -77,11 +78,14 @@ mod riscv;
#[cfg(feature = "x86")]
mod x86;
#[cfg(feature = "x64")]
mod x64;
#[cfg(feature = "arm32")]
mod arm32;
#[cfg(feature = "arm64")]
mod aarch64;
pub(crate) mod aarch64;
#[cfg(feature = "unwind")]
pub mod unwind;
@ -419,6 +423,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
None
}
/// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait
/// with `isa.as_any().downcast_ref::<isa::foo::Isa>()`.
fn as_any(&self) -> &dyn Any;
}
impl Debug for &dyn TargetIsa {

Просмотреть файл

@ -17,6 +17,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use core::any::Any;
use core::fmt;
use target_lexicon::{PointerWidth, Triple};
@ -130,6 +131,10 @@ impl TargetIsa for Isa {
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
unimplemented!()
}
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
}
#[cfg(test)]
@ -163,7 +168,7 @@ mod tests {
let arg32 = func.dfg.append_block_param(block, types::I32);
// Try to encode iadd_imm.i64 v1, -10.
let inst64 = InstructionData::BinaryImm {
let inst64 = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10),
@ -176,7 +181,7 @@ mod tests {
);
// Try to encode iadd_imm.i64 v1, -10000.
let inst64_large = InstructionData::BinaryImm {
let inst64_large = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10000),
@ -186,7 +191,7 @@ mod tests {
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
// Create an iadd_imm.i32 which is encodable in RV64.
let inst32 = InstructionData::BinaryImm {
let inst32 = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg32,
imm: immediates::Imm64::new(10),
@ -214,7 +219,7 @@ mod tests {
let arg32 = func.dfg.append_block_param(block, types::I32);
// Try to encode iadd_imm.i64 v1, -10.
let inst64 = InstructionData::BinaryImm {
let inst64 = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10),
@ -224,7 +229,7 @@ mod tests {
assert!(isa.encode(&func, &inst64, types::I64).is_err());
// Try to encode iadd_imm.i64 v1, -10000.
let inst64_large = InstructionData::BinaryImm {
let inst64_large = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10000),
@ -234,7 +239,7 @@ mod tests {
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
// Create an iadd_imm.i32 which is encodable in RV32.
let inst32 = InstructionData::BinaryImm {
let inst32 = InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg: arg32,
imm: immediates::Imm64::new(10),

Просмотреть файл

@ -8,7 +8,6 @@ use thiserror::Error;
use serde::{Deserialize, Serialize};
type Register = u16;
type Expression = Vec<u8>;
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
#[allow(missing_docs)]
@ -23,6 +22,8 @@ pub enum RegisterMappingError {
}
// This mirrors gimli's CallFrameInstruction, but is serializable
// This excludes CfaExpression, Expression, ValExpression due to
// https://github.com/gimli-rs/gimli/issues/513.
// TODO: if gimli ever adds serialization support, remove this type
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
@ -30,15 +31,12 @@ pub(crate) enum CallFrameInstruction {
Cfa(Register, i32),
CfaRegister(Register),
CfaOffset(i32),
CfaExpression(Expression),
Restore(Register),
Undefined(Register),
SameValue(Register),
Offset(Register, i32),
ValOffset(Register, i32),
Register(Register, Register),
Expression(Register, Expression),
ValExpression(Register, Expression),
RememberState,
RestoreState,
ArgsSize(u32),
@ -52,34 +50,33 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
CallFrameInstruction::CfaExpression(expr) => Self::CfaExpression(expr.0),
CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
CallFrameInstruction::Expression(reg, expr) => Self::Expression(reg.0, expr.0),
CallFrameInstruction::ValExpression(reg, expr) => Self::ValExpression(reg.0, expr.0),
CallFrameInstruction::RememberState => Self::RememberState,
CallFrameInstruction::RestoreState => Self::RestoreState,
CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
_ => {
// Cranelift's unwind support does not generate `CallFrameInstruction`s with
// Expression at this moment, and it is not trivial to
// serialize such instructions.
panic!("CallFrameInstruction with Expression not supported");
}
}
}
}
impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
fn into(self) -> gimli::write::CallFrameInstruction {
use gimli::{
write::{CallFrameInstruction, Expression},
Register,
};
use gimli::{write::CallFrameInstruction, Register};
match self {
Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
Self::CfaExpression(expr) => CallFrameInstruction::CfaExpression(Expression(expr)),
Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
@ -88,12 +85,6 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
Self::Register(reg1, reg2) => {
CallFrameInstruction::Register(Register(reg1), Register(reg2))
}
Self::Expression(reg, expr) => {
CallFrameInstruction::Expression(Register(reg), Expression(expr))
}
Self::ValExpression(reg, expr) => {
CallFrameInstruction::ValExpression(Register(reg), Expression(expr))
}
Self::RememberState => CallFrameInstruction::RememberState,
Self::RestoreState => CallFrameInstruction::RestoreState,
Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),

467
third_party/rust/cranelift-codegen/src/isa/x64/abi.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,467 @@
//! Implementation of the standard x64 ABI.
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
use crate::isa::{self, x64::inst::*};
use crate::machinst::*;
use crate::settings;
use args::*;
#[derive(Clone, Debug)]
enum ABIArg {
Reg(RealReg),
_Stack,
}
#[derive(Clone, Debug)]
enum ABIRet {
Reg(RealReg),
_Stack,
}
pub(crate) struct X64ABIBody {
args: Vec<ABIArg>,
rets: Vec<ABIRet>,
/// Offsets to each stack slot.
_stack_slots: Vec<usize>,
/// Total stack size of all the stack slots.
stack_slots_size: usize,
/// Clobbered registers, as indicated by regalloc.
clobbered: Set<Writable<RealReg>>,
/// Total number of spill slots, as indicated by regalloc.
num_spill_slots: Option<usize>,
/// Calculated while creating the prologue, and used when creating the epilogue. Amount by
/// which RSP is adjusted downwards to allocate the spill area.
frame_size_bytes: Option<usize>,
call_conv: isa::CallConv,
/// The settings controlling this function's compilation.
flags: settings::Flags,
}
fn in_int_reg(ty: types::Type) -> bool {
match ty {
types::I8
| types::I16
| types::I32
| types::I64
| types::B1
| types::B8
| types::B16
| types::B32
| types::B64 => true,
_ => false,
}
}
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rdi()),
1 => Some(regs::rsi()),
2 => Some(regs::rdx()),
3 => Some(regs::rcx()),
4 => Some(regs::r8()),
5 => Some(regs::r9()),
_ => None,
}
}
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()),
_ => None,
}
}
fn is_callee_save_systemv(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
_ => unimplemented!(),
}
}
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
regs.into_iter()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect()
}
impl X64ABIBody {
/// Create a new body ABI instance.
pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
// Compute args and retvals from signature.
let mut args = vec![];
let mut next_int_arg = 0;
for param in &f.signature.params {
match param.purpose {
ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
// `VMContext` is `r14` in Baldrdash.
args.push(ABIArg::Reg(regs::r14().to_real_reg()));
}
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
if in_int_reg(param.value_type) {
if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
args.push(ABIArg::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing arg on the stack");
}
next_int_arg += 1;
} else {
unimplemented!("non int normal register")
}
}
_ => unimplemented!("other parameter purposes"),
}
}
let mut rets = vec![];
let mut next_int_retval = 0;
for ret in &f.signature.returns {
match ret.purpose {
ir::ArgumentPurpose::Normal => {
if in_int_reg(ret.value_type) {
if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
rets.push(ABIRet::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing return on the stack");
}
next_int_retval += 1;
} else {
unimplemented!("returning non integer normal value");
}
}
_ => {
unimplemented!("non normal argument purpose");
}
}
}
// Compute stackslot locations and total stackslot size.
let mut stack_offset: usize = 0;
let mut _stack_slots = vec![];
for (stackslot, data) in f.stack_slots.iter() {
let off = stack_offset;
stack_offset += data.size as usize;
// 8-bit align.
stack_offset = (stack_offset + 7) & !7usize;
debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
_stack_slots.push(off);
}
Self {
args,
rets,
_stack_slots,
stack_slots_size: stack_offset,
clobbered: Set::empty(),
num_spill_slots: None,
frame_size_bytes: None,
call_conv: f.signature.call_conv.clone(),
flags,
}
}
}
impl ABIBody for X64ABIBody {
type I = Inst;
fn temp_needed(&self) -> bool {
false
}
fn init(&mut self, _: Option<Writable<Reg>>) {}
fn flags(&self) -> &settings::Flags {
&self.flags
}
fn num_args(&self) -> usize {
unimplemented!()
}
fn num_retvals(&self) -> usize {
unimplemented!()
}
fn num_stackslots(&self) -> usize {
unimplemented!()
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for arg in &self.args {
if let &ABIArg::Reg(r) = arg {
set.insert(r);
}
}
set
}
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for ret in &self.rets {
if let &ABIRet::Reg(r) = ret {
set.insert(r);
}
}
set
}
fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
match &self.args[idx] {
ABIArg::Reg(from_reg) => {
if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
// TODO do we need a sign extension if it's I32?
return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
}
unimplemented!("moving from non-int arg to vreg");
}
ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
}
}
fn gen_retval_area_setup(&self) -> Option<Inst> {
None
}
fn gen_copy_reg_to_retval(
&self,
idx: usize,
from_reg: Writable<Reg>,
ext: ArgumentExtension,
) -> Vec<Inst> {
match ext {
ArgumentExtension::None => {}
_ => unimplemented!(
"unimplemented argument extension {:?} is required for baldrdash",
ext
),
};
let mut ret = Vec::new();
match &self.rets[idx] {
ABIRet::Reg(to_reg) => {
if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
ret.push(Inst::mov_r_r(
/*is64=*/ true,
from_reg.to_reg(),
Writable::<Reg>::from_reg(to_reg.to_reg()),
))
} else {
unimplemented!("moving from vreg to non-int return value");
}
}
ABIRet::_Stack => {
unimplemented!("moving from vreg to stack return value");
}
}
ret
}
fn gen_ret(&self) -> Inst {
Inst::ret()
}
fn gen_epilogue_placeholder(&self) -> Inst {
Inst::epilogue_placeholder()
}
fn set_num_spillslots(&mut self, slots: usize) {
self.num_spill_slots = Some(slots);
}
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
self.clobbered = clobbered;
}
fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
unimplemented!()
}
fn load_stackslot(
&self,
_slot: StackSlot,
_offset: u32,
_ty: Type,
_into_reg: Writable<Reg>,
) -> Inst {
unimplemented!("load_stackslot")
}
fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
unimplemented!("store_stackslot")
}
fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
unimplemented!("load_spillslot")
}
fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
unimplemented!("store_spillslot")
}
fn gen_prologue(&mut self) -> Vec<Inst> {
let r_rsp = regs::rsp();
let mut insts = vec![];
// Baldrdash generates its own prologue sequence, so we don't have to.
if !self.call_conv.extends_baldrdash() {
let r_rbp = regs::rbp();
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
// The "traditional" pre-preamble
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::push64(RMI::reg(r_rbp)));
// RSP is now 0 % 16
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
}
// Save callee saved registers that we trash. Keep track of how much space we've used, so
// as to know what we have to do to get the base of the spill area 0 % 16.
let mut callee_saved_used = 0;
let clobbered = get_callee_saves(self.clobbered.to_vec());
for reg in clobbered {
let r_reg = reg.to_reg();
match r_reg.get_class() {
RegClass::I64 => {
insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
callee_saved_used += 8;
}
_ => unimplemented!(),
}
}
let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
if self.call_conv.extends_baldrdash() {
// Baldrdash expects the stack to take at least the number of words set in
// baldrdash_prologue_words; count them here.
debug_assert!(
!self.flags.enable_probestack(),
"baldrdash does not expect cranelift to emit stack probes"
);
total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
}
debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
let frame_size = total_stacksize + callee_saved_used % 16;
// Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
let frame_size = (frame_size + 15) & !15;
if frame_size > 0x7FFF_FFFF {
unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
}
if !self.call_conv.extends_baldrdash() {
// Explicitly allocate the frame.
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
if frame_size > 0 {
insts.push(Inst::alu_rmi_r(
true,
RMI_R_Op::Sub,
RMI::imm(frame_size as u32),
w_rsp,
));
}
}
// Stash this value. We'll need it for the epilogue.
debug_assert!(self.frame_size_bytes.is_none());
self.frame_size_bytes = Some(frame_size);
insts
}
fn gen_epilogue(&self) -> Vec<Inst> {
let mut insts = vec![];
// Undo what we did in the prologue.
// Clear the spill area and the 16-alignment padding below it.
if !self.call_conv.extends_baldrdash() {
let frame_size = self.frame_size_bytes.unwrap();
if frame_size > 0 {
let r_rsp = regs::rsp();
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
insts.push(Inst::alu_rmi_r(
true,
RMI_R_Op::Add,
RMI::imm(frame_size as u32),
w_rsp,
));
}
}
// Restore regs.
let clobbered = get_callee_saves(self.clobbered.to_vec());
for w_real_reg in clobbered.into_iter().rev() {
match w_real_reg.to_reg().get_class() {
RegClass::I64 => {
// TODO: make these conversion sequences less cumbersome.
insts.push(Inst::pop64(Writable::<Reg>::from_reg(
w_real_reg.to_reg().to_reg(),
)))
}
_ => unimplemented!(),
}
}
// Baldrdash generates its own preamble.
if !self.call_conv.extends_baldrdash() {
let r_rbp = regs::rbp();
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
// Undo the "traditional" pre-preamble
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::pop64(w_rbp));
insts.push(Inst::ret());
}
insts
}
fn frame_size(&self) -> u32 {
self.frame_size_bytes
.expect("frame size not computed before prologue generation") as u32
}
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
(RegClass::V128, _) => 2,
_ => panic!("Unexpected register class!"),
}
}
fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
unimplemented!()
}
fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
unimplemented!()
}
}

420
third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,420 @@
//! Instruction operand sub-components (aka "parts"): definitions and printing.
use std::fmt;
use std::string::{String, ToString};
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
use crate::machinst::*;
use super::regs::show_ireg_sized;
/// A Memory Address. These denote a 64-bit value only.
#[derive(Clone)]
pub(crate) enum Addr {
/// Immediate sign-extended and a Register.
IR { simm32: u32, base: Reg },
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
IRRS {
simm32: u32,
base: Reg,
index: Reg,
shift: u8, /* 0 .. 3 only */
},
}
impl Addr {
// Constructors.
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
Self::IR { simm32, base }
}
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
debug_assert!(index.get_class() == RegClass::I64);
debug_assert!(shift <= 3);
Addr::IRRS {
simm32,
base,
index,
shift,
}
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
Addr::IR { simm32: _, base } => {
collector.add_use(*base);
}
Addr::IRRS {
simm32: _,
base,
index,
shift: _,
} => {
collector.add_use(*base);
collector.add_use(*index);
}
}
}
}
impl ShowWithRRU for Addr {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
Addr::IRRS {
simm32,
base,
index,
shift,
} => format!(
"{}({},{},{})",
*simm32 as i32,
base.show_rru(mb_rru),
index.show_rru(mb_rru),
1 << shift
),
}
}
}
/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
/// `simm32` is its sign-extension out to 64 bits.
#[derive(Clone)]
pub(crate) enum RMI {
R { reg: Reg },
M { addr: Addr },
I { simm32: u32 },
}
impl RMI {
// Constructors
pub(crate) fn reg(reg: Reg) -> RMI {
debug_assert!(reg.get_class() == RegClass::I64);
RMI::R { reg }
}
pub(crate) fn mem(addr: Addr) -> RMI {
RMI::M { addr }
}
pub(crate) fn imm(simm32: u32) -> RMI {
RMI::I { simm32 }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
RMI::R { reg } => collector.add_use(*reg),
RMI::M { addr } => addr.get_regs_as_uses(collector),
RMI::I { simm32: _ } => {}
}
}
}
impl ShowWithRRU for RMI {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
RMI::M { addr } => addr.show_rru(mb_rru),
RMI::I { simm32 } => format!("${}", *simm32 as i32),
}
}
}
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
/// 32 or 64 bit value.
#[derive(Clone)]
pub(crate) enum RM {
R { reg: Reg },
M { addr: Addr },
}
impl RM {
// Constructors.
pub(crate) fn reg(reg: Reg) -> Self {
debug_assert!(reg.get_class() == RegClass::I64);
RM::R { reg }
}
pub(crate) fn mem(addr: Addr) -> Self {
RM::M { addr }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
RM::R { reg } => collector.add_use(*reg),
RM::M { addr } => addr.get_regs_as_uses(collector),
}
}
}
impl ShowWithRRU for RM {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
RM::M { addr } => addr.show_rru(mb_rru),
}
}
}
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
#[derive(Clone, PartialEq)]
pub enum RMI_R_Op {
Add,
Sub,
And,
Or,
Xor,
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
Mul,
}
impl RMI_R_Op {
pub(crate) fn to_string(&self) -> String {
match self {
RMI_R_Op::Add => "add".to_string(),
RMI_R_Op::Sub => "sub".to_string(),
RMI_R_Op::And => "and".to_string(),
RMI_R_Op::Or => "or".to_string(),
RMI_R_Op::Xor => "xor".to_string(),
RMI_R_Op::Mul => "imul".to_string(),
}
}
}
impl fmt::Debug for RMI_R_Op {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate ways of extending (widening) a value, using the Intel naming:
/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
#[derive(Clone, PartialEq)]
pub enum ExtMode {
/// Byte -> Longword.
BL,
/// Byte -> Quadword.
BQ,
/// Word -> Longword.
WL,
/// Word -> Quadword.
WQ,
/// Longword -> Quadword.
LQ,
}
impl ExtMode {
pub(crate) fn to_string(&self) -> String {
match self {
ExtMode::BL => "bl".to_string(),
ExtMode::BQ => "bq".to_string(),
ExtMode::WL => "wl".to_string(),
ExtMode::WQ => "wq".to_string(),
ExtMode::LQ => "lq".to_string(),
}
}
pub(crate) fn dst_size(&self) -> u8 {
match self {
ExtMode::BL => 4,
ExtMode::BQ => 8,
ExtMode::WL => 4,
ExtMode::WQ => 8,
ExtMode::LQ => 8,
}
}
}
impl fmt::Debug for ExtMode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
#[derive(Clone)]
pub enum ShiftKind {
Left,
RightZ,
RightS,
}
impl ShiftKind {
pub(crate) fn to_string(&self) -> String {
match self {
ShiftKind::Left => "shl".to_string(),
ShiftKind::RightZ => "shr".to_string(),
ShiftKind::RightS => "sar".to_string(),
}
}
}
impl fmt::Debug for ShiftKind {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate condition code tests. Not all are represented since not all are useful in
/// compiler-generated code.
#[derive(Copy, Clone)]
#[repr(u8)]
pub enum CC {
/// overflow
O = 0,
/// no overflow
NO = 1,
/// < unsigned
B = 2,
/// >= unsigned
NB = 3,
/// zero
Z = 4,
/// not-zero
NZ = 5,
/// <= unsigned
BE = 6,
/// > unsigend
NBE = 7,
/// negative
S = 8,
/// not-negative
NS = 9,
/// < signed
L = 12,
/// >= signed
NL = 13,
/// <= signed
LE = 14,
/// > signed
NLE = 15,
}
impl CC {
pub(crate) fn to_string(&self) -> String {
match self {
CC::O => "o".to_string(),
CC::NO => "no".to_string(),
CC::B => "b".to_string(),
CC::NB => "nb".to_string(),
CC::Z => "z".to_string(),
CC::NZ => "nz".to_string(),
CC::BE => "be".to_string(),
CC::NBE => "nbe".to_string(),
CC::S => "s".to_string(),
CC::NS => "ns".to_string(),
CC::L => "l".to_string(),
CC::NL => "nl".to_string(),
CC::LE => "le".to_string(),
CC::NLE => "nle".to_string(),
}
}
pub(crate) fn invert(&self) -> CC {
match self {
CC::O => CC::NO,
CC::NO => CC::O,
CC::B => CC::NB,
CC::NB => CC::B,
CC::Z => CC::NZ,
CC::NZ => CC::Z,
CC::BE => CC::NBE,
CC::NBE => CC::BE,
CC::S => CC::NS,
CC::NS => CC::S,
CC::L => CC::NL,
CC::NL => CC::L,
CC::LE => CC::NLE,
CC::NLE => CC::LE,
}
}
pub(crate) fn get_enc(self) -> u8 {
self as u8
}
}
impl fmt::Debug for CC {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug)]
pub enum BranchTarget {
/// An unresolved reference to a MachLabel.
Label(MachLabel),
/// A resolved reference to another instruction, in bytes.
ResolvedOffset(isize),
}
impl ShowWithRRU for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
BranchTarget::Label(l) => format!("{:?}", l),
BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
}
}
}
impl BranchTarget {
/// Get the label.
pub fn as_label(&self) -> Option<MachLabel> {
match self {
&BranchTarget::Label(l) => Some(l),
_ => None,
}
}
/// Get the offset as a signed 32 bit byte offset. This returns the
/// offset in bytes between the first byte of the source and the first
/// byte of the target. It does not take into account the Intel-specific
/// rule that a branch offset is encoded as relative to the start of the
/// following instruction. That is a problem for the emitter to deal
/// with. If a label, returns zero.
pub fn as_offset32_or_zero(&self) -> i32 {
match self {
&BranchTarget::ResolvedOffset(off) => {
// Leave a bit of slack so that the emitter is guaranteed to
// be able to add the length of the jump instruction encoding
// to this value and still have a value in signed-32 range.
assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
off as i32
}
_ => 0,
}
}
}

892
third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,892 @@
use regalloc::{Reg, RegClass};
use crate::isa::x64::inst::*;
fn low8willSXto64(x: u32) -> bool {
let xs = (x as i32) as i64;
xs == ((xs << 56) >> 56)
}
fn low8willSXto32(x: u32) -> bool {
let xs = x as i32;
xs == ((xs << 24) >> 24)
}
//=============================================================================
// Instructions and subcomponents: emission
// For all of the routines that take both a memory-or-reg operand (sometimes
// called "E" in the Intel documentation) and a reg-only operand ("G" in
// Intelese), the order is always G first, then E.
//
// "enc" in the following means "hardware register encoding number".
#[inline(always)]
fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
debug_assert!(m0d < 4);
debug_assert!(encRegG < 8);
debug_assert!(rmE < 8);
((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
}
#[inline(always)]
fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
debug_assert!(shift < 4);
debug_assert!(encIndex < 8);
debug_assert!(encBase < 8);
((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
}
/// Get the encoding number from something which we sincerely hope is a real
/// register of class I64.
#[inline(always)]
fn iregEnc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert!(reg.get_class() == RegClass::I64);
reg.get_hw_encoding()
}
// F_*: these flags describe special handling of the insn to be generated. Be
// careful with these. It is easy to create nonsensical combinations.
const F_NONE: u32 = 0;
/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
const F_RETAIN_REDUNDANT_REX: u32 = 1;
/// Set the W bit in the REX prefix to zero. By default it will be set to 1,
/// indicating a 64-bit operation.
const F_CLEAR_REX_W: u32 = 2;
/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate
/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W.
const F_PREFIX_66: u32 = 4;
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a register `encG` and a memory
/// address `memE`, create and emit, first the REX prefix, then caller-supplied
/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
/// optionally, a SIB byte, and finally optionally an immediate that will be
/// derived from the `memE` operand. For most instructions up to and including
/// SSE4.2, that will be the whole instruction.
///
/// The opcodes are written bigendianly for the convenience of callers. For
/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
///
/// The register operand is represented here not as a `Reg` but as its hardware
/// encoding, `encG`. `flags` can specify special handling for the REX prefix.
/// By default, the REX prefix will indicate a 64-bit operation and will be
/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the
/// REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
memE: &Addr,
flags: u32,
) {
// General comment for this function: the registers in `memE` must be
// 64-bit integer registers, because they are part of an address
// expression. But `encG` can be derived from a register of any class.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override, if requested. This indicates a 16-bit
// operation.
if prefix66 {
sink.put1(0x66);
}
match memE {
Addr::IR { simm32, base: regE } => {
// First, cook up the REX byte. This is easy.
let encE = iregEnc(*regE);
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = 0;
let b = (encE >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm and associated immediates. This is
// significantly complicated due to the multiple special cases.
if *simm32 == 0
&& encE != regs::ENC_RSP
&& encE != regs::ENC_RBP
&& encE != regs::ENC_R12
&& encE != regs::ENC_R13
{
// FIXME JRS 2020Feb11: those four tests can surely be
// replaced by a single mask-and-compare check. We should do
// that because this routine is likely to be hot.
sink.put1(mkModRegRM(0, encG & 7, encE & 7));
} else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
sink.put1(mkModRegRM(0, encG & 7, 4));
sink.put1(0x24);
} else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
sink.put1(mkModRegRM(1, encG & 7, encE & 7));
sink.put1((simm32 & 0xFF) as u8);
} else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
sink.put1(mkModRegRM(2, encG & 7, encE & 7));
sink.put4(*simm32);
} else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
// REX.B distinguishes RSP from R12
sink.put1(mkModRegRM(1, encG & 7, 4));
sink.put1(0x24);
sink.put1((simm32 & 0xFF) as u8);
} else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
//.. wait for test case for RSP case
// REX.B distinguishes RSP from R12
sink.put1(mkModRegRM(2, encG & 7, 4));
sink.put1(0x24);
sink.put4(*simm32);
} else {
unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
}
}
// Bizarrely, the IRRS case is much simpler.
Addr::IRRS {
simm32,
base: regBase,
index: regIndex,
shift,
} => {
let encBase = iregEnc(*regBase);
let encIndex = iregEnc(*regIndex);
// The rex byte
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = (encIndex >> 3) & 1;
let b = (encBase >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// All other prefixes and opcodes
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// modrm, SIB, immediates
if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
sink.put1(mkModRegRM(1, encG & 7, 4));
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
sink.put1(*simm32 as u8);
} else if encIndex != regs::ENC_RSP {
sink.put1(mkModRegRM(2, encG & 7, 4));
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
sink.put4(*simm32);
} else {
panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
}
}
}
}
/// This is the core 'emit' function for instructions that do not reference
/// memory.
///
/// This is conceptually the same as
/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
/// where the E operand is a register rather than memory. Hence it is much
/// simpler.
fn emit_REX_OPCODES_MODRM_encG_encE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
encE: u8,
flags: u32,
) {
// EncG and EncE can be derived from registers of any class, and they
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override
if prefix66 {
sink.put1(0x66);
}
// The rex byte
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = 0;
let b = (encE >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// All other prefixes and opcodes
while numOpcodes > 0 {
numOpcodes -= 1;
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm byte. The instruction we're generating doesn't access
// memory, so there is no SIB byte or immediate -- we're done.
sink.put1(mkModRegRM(3, encG & 7, encE & 7));
}
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
memE: &Addr,
flags: u32,
) {
// JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
}
fn emit_REX_OPCODES_MODRM_regG_regE(
sink: &mut MachBuffer<Inst>,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
regE: Reg,
flags: u32,
) {
// JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
let encE = iregEnc(regE);
emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
}
/// Write a suitable number of bits from an imm64 to the sink.
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
1 => sink.put1(simm32 as u8),
_ => panic!("x64::Inst::emit_simm: unreachable"),
}
}
/// The top-level emit function.
///
/// Important! Do not add improved (shortened) encoding cases to existing
/// instructions without also adding tests for those improved encodings. That
/// is a dangerous game that leads to hard-to-track-down errors in the emitted
/// code.
///
/// For all instructions, make sure to have test coverage for all of the
/// following situations. Do this by creating the cross product resulting from
/// applying the following rules to each operand:
///
/// (1) for any insn that mentions a register: one test using a register from
/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
/// This helps detect incorrect REX prefix construction.
///
/// (2) for any insn that mentions a byte register: one test for each of the
/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
/// [r8b .. r11b] and [r12b .. r15b]. This checks that
/// apparently-redundant REX prefixes are retained when required.
///
/// (3) for any insn that contains an immediate field, check the following
/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
/// instructions that require a 32-bit immediate have a short-form encoding
/// when the imm is in simm8 range.
///
/// Rules (1), (2) and (3) don't apply for registers within address expressions
/// (`Addr`s). Those are already pretty well tested, and the registers in them
/// don't have any effect on the containing instruction (apart from possibly
/// require REX prefix bits).
///
/// When choosing registers for a test, avoid using registers with the same
/// offset within a given group. For example, don't use rax and r8, since they
/// both have the lowest 3 bits as 000, and so the test won't detect errors
/// where those 3-bit register sub-fields are confused by the emitter. Instead
/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
/// and bpl since they have the same offset in their group; use instead (eg) cl
/// and sil.
///
/// For all instructions, also add a test that uses only low-half registers
/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
/// prefixes are correctly omitted. This low-half restriction must apply to
/// _all_ registers in the insn, even those in address expressions.
///
/// Following these rules creates large numbers of test cases, but it's the
/// only way to make the emitter reliable.
///
/// Known possible improvements:
///
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
/// care?)
pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
match inst {
Inst::Nop { len: 0 } => {}
Inst::Alu_RMI_R {
is_64,
op,
src: srcE,
dst: regG,
} => {
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
if *op == RMI_R_Op::Mul {
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
// we have to special-case it.
match srcE {
RMI::R { reg: regE } => {
emit_REX_OPCODES_MODRM_regG_regE(
sink,
0x0FAF,
2,
regG.to_reg(),
*regE,
flags,
);
}
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FAF,
2,
regG.to_reg(),
addr,
flags,
);
}
RMI::I { simm32 } => {
let useImm8 = low8willSXto32(*simm32);
let opcode = if useImm8 { 0x6B } else { 0x69 };
// Yes, really, regG twice.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
opcode,
1,
regG.to_reg(),
regG.to_reg(),
flags,
);
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
}
}
} else {
let (opcode_R, opcode_M, subopcode_I) = match op {
RMI_R_Op::Add => (0x01, 0x03, 0),
RMI_R_Op::Sub => (0x29, 0x2B, 5),
RMI_R_Op::And => (0x21, 0x23, 4),
RMI_R_Op::Or => (0x09, 0x0B, 1),
RMI_R_Op::Xor => (0x31, 0x33, 6),
RMI_R_Op::Mul => panic!("unreachable"),
};
match srcE {
RMI::R { reg: regE } => {
// Note. The arguments .. regE .. regG .. sequence
// here is the opposite of what is expected. I'm not
// sure why this is. But I am fairly sure that the
// arg order could be switched back to the expected
// .. regG .. regE .. if opcode_rr is also switched
// over to the "other" basic integer opcode (viz, the
// R/RM vs RM/R duality). However, that would mean
// that the test results won't be in accordance with
// the GNU as reference output. In other words, the
// inversion exists as a result of using GNU as as a
// gold standard.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
opcode_R,
1,
*regE,
regG.to_reg(),
flags,
);
// NB: if this is ever extended to handle byte size
// ops, be sure to retain redundant REX prefixes.
}
RMI::M { addr } => {
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
opcode_M,
1,
regG.to_reg(),
addr,
flags,
);
}
RMI::I { simm32 } => {
let useImm8 = low8willSXto32(*simm32);
let opcode = if useImm8 { 0x83 } else { 0x81 };
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(regG.to_reg());
emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
}
}
}
}
Inst::Imm_R {
dst_is_64,
simm64,
dst,
} => {
let encDst = iregEnc(dst.to_reg());
if *dst_is_64 {
// FIXME JRS 2020Feb10: also use the 32-bit case here when
// possible
sink.put1(0x48 | ((encDst >> 3) & 1));
sink.put1(0xB8 | (encDst & 7));
sink.put8(*simm64);
} else {
if ((encDst >> 3) & 1) == 1 {
sink.put1(0x41);
}
sink.put1(0xB8 | (encDst & 7));
sink.put4(*simm64 as u32);
}
}
Inst::Mov_R_R { is_64, src, dst } => {
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
}
Inst::MovZX_M_R { extMode, addr, dst } => {
match extMode {
ExtMode::BL => {
// MOVZBL is (REX.W==0) 0F B6 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB6,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::BQ => {
// MOVZBQ is (REX.W==1) 0F B6 /r
// I'm not sure why the Intel manual offers different
// encodings for MOVZBQ than for MOVZBL. AIUI they should
// achieve the same, since MOVZBL is just going to zero out
// the upper half of the destination anyway.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB6,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::WL => {
// MOVZWL is (REX.W==0) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB7,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::WQ => {
// MOVZWQ is (REX.W==1) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FB7,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::LQ => {
// This is just a standard 32 bit load, and we rely on the
// default zero-extension rule to perform the extension.
// MOV r/m32, r32 is (REX.W==0) 8B /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x8B,
1,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
}
}
Inst::Mov64_M_R { addr, dst } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
}
Inst::MovSX_M_R { extMode, addr, dst } => {
match extMode {
ExtMode::BL => {
// MOVSBL is (REX.W==0) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBE,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::BQ => {
// MOVSBQ is (REX.W==1) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBE,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::WL => {
// MOVSWL is (REX.W==0) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBF,
2,
dst.to_reg(),
addr,
F_CLEAR_REX_W,
)
}
ExtMode::WQ => {
// MOVSWQ is (REX.W==1) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x0FBF,
2,
dst.to_reg(),
addr,
F_NONE,
)
}
ExtMode::LQ => {
// MOVSLQ is (REX.W==1) 63 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x63,
1,
dst.to_reg(),
addr,
F_NONE,
)
}
}
}
Inst::Mov_R_M { size, src, addr } => {
match size {
1 => {
// This is one of the few places where the presence of a
// redundant REX prefix changes the meaning of the
// instruction.
let encSrc = iregEnc(*src);
let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
F_RETAIN_REDUNDANT_REX
} else {
0
};
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x88,
1,
*src,
addr,
F_CLEAR_REX_W | retainRedundantRex,
)
}
2 => {
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x89,
1,
*src,
addr,
F_CLEAR_REX_W | F_PREFIX_66,
)
}
4 => {
// MOV r32, r/m32 is (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
0x89,
1,
*src,
addr,
F_CLEAR_REX_W,
)
}
8 => {
// MOV r64, r/m64 is (REX.W==1) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
}
_ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
}
}
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
} => {
let encDst = iregEnc(dst.to_reg());
let subopcode = match kind {
ShiftKind::Left => 4,
ShiftKind::RightZ => 5,
ShiftKind::RightS => 7,
};
match num_bits {
None => {
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xD3,
1,
subopcode,
encDst,
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
);
}
Some(num_bits) => {
// SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
// When the shift amount is 1, there's an even shorter encoding, but we don't
// bother with that nicety here.
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xC1,
1,
subopcode,
encDst,
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
);
sink.put1(*num_bits);
}
}
}
Inst::Cmp_RMI_R {
size,
src: srcE,
dst: regG,
} => {
let mut retainRedundantRex = 0;
if *size == 1 {
// Here, a redundant REX prefix changes the meaning of the
// instruction.
let encG = iregEnc(*regG);
if encG >= 4 && encG <= 7 {
retainRedundantRex = F_RETAIN_REDUNDANT_REX;
}
}
let mut flags = match size {
8 => F_NONE,
4 => F_CLEAR_REX_W,
2 => F_CLEAR_REX_W | F_PREFIX_66,
1 => F_CLEAR_REX_W | retainRedundantRex,
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
};
match srcE {
RMI::R { reg: regE } => {
let opcode = if *size == 1 { 0x38 } else { 0x39 };
if *size == 1 {
// We also need to check whether the E register forces
// the use of a redundant REX.
let encE = iregEnc(*regE);
if encE >= 4 && encE <= 7 {
flags |= F_RETAIN_REDUNDANT_REX;
}
}
// Same comment re swapped args as for Alu_RMI_R.
emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
}
RMI::M { addr } => {
let opcode = if *size == 1 { 0x3A } else { 0x3B };
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
}
RMI::I { simm32 } => {
// FIXME JRS 2020Feb11: there are shorter encodings for
// cmp $imm, rax/eax/ax/al.
let useImm8 = low8willSXto32(*simm32);
let opcode = if *size == 1 {
0x80
} else if useImm8 {
0x83
} else {
0x81
};
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(*regG);
emit_REX_OPCODES_MODRM_encG_encE(
sink, opcode, 1, 7, /*subopcode*/
encG, flags,
);
emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
}
}
}
Inst::Push64 { src } => {
match src {
RMI::R { reg } => {
let encReg = iregEnc(*reg);
let rex = 0x40 | ((encReg >> 3) & 1);
if rex != 0x40 {
sink.put1(rex);
}
sink.put1(0x50 | (encReg & 7));
}
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
6, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
RMI::I { simm32 } => {
if low8willSXto64(*simm32) {
sink.put1(0x6A);
sink.put1(*simm32 as u8);
} else {
sink.put1(0x68);
sink.put4(*simm32);
}
}
}
}
Inst::Pop64 { dst } => {
let encDst = iregEnc(dst.to_reg());
if encDst >= 8 {
// 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant
// here.
sink.put1(0x41);
}
sink.put1(0x58 + (encDst & 7));
}
//
// ** Inst::CallKnown
//
Inst::CallUnknown { dest } => {
match dest {
RM::R { reg } => {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xFF,
1,
2, /*subopcode*/
regEnc,
F_CLEAR_REX_W,
);
}
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
2, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
}
}
Inst::Ret {} => sink.put1(0xC3),
Inst::JmpKnown { dest } => {
let disp = dest.as_offset32_or_zero() - 5;
let disp = disp as u32;
let br_start = sink.cur_offset();
let br_disp_off = br_start + 1;
let br_end = br_start + 5;
if let Some(l) = dest.as_label() {
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(br_start, br_end, l);
}
sink.put1(0xE9);
sink.put4(disp);
}
Inst::JmpCondSymm {
cc,
taken,
not_taken,
} => {
// Conditional part.
// This insn is 6 bytes long. Currently `offset` is relative to
// the start of this insn, but the Intel encoding requires it to
// be relative to the start of the next instruction. Hence the
// adjustment.
let taken_disp = taken.as_offset32_or_zero() - 6;
let taken_disp = taken_disp as u32;
let cond_start = sink.cur_offset();
let cond_disp_off = cond_start + 2;
let cond_end = cond_start + 6;
if let Some(l) = taken.as_label() {
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
let inverted: [u8; 6] =
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
}
sink.put1(0x0F);
sink.put1(0x80 + cc.get_enc());
sink.put4(taken_disp);
// Unconditional part.
let nt_disp = not_taken.as_offset32_or_zero() - 5;
let nt_disp = nt_disp as u32;
let uncond_start = sink.cur_offset();
let uncond_disp_off = uncond_start + 1;
let uncond_end = uncond_start + 5;
if let Some(l) = not_taken.as_label() {
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
sink.add_uncond_branch(uncond_start, uncond_end, l);
}
sink.put1(0xE9);
sink.put4(nt_disp);
}
Inst::JmpUnknown { target } => {
match target {
RM::R { reg } => {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
0xFF,
1,
4, /*subopcode*/
regEnc,
F_CLEAR_REX_W,
);
}
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
0xFF,
1,
4, /*subopcode*/
addr,
F_CLEAR_REX_W,
);
}
}
}
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
}
}

2191
third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

905
third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,905 @@
//! This module defines x86_64-specific machine instruction types.
#![allow(dead_code)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
use core::convert::TryFrom;
use smallvec::SmallVec;
use std::fmt;
use std::string::{String, ToString};
use regalloc::RegUsageCollector;
use regalloc::Set;
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
use crate::binemit::CodeOffset;
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
use crate::ir::ExternalName;
use crate::ir::Type;
use crate::machinst::*;
use crate::settings::Flags;
use crate::{settings, CodegenError, CodegenResult};
pub mod args;
mod emit;
#[cfg(test)]
mod emit_tests;
pub mod regs;
use args::*;
use regs::{create_reg_universe_systemv, show_ireg_sized};
//=============================================================================
// Instructions (top level): definition
// Don't build these directly. Instead use the Inst:: functions to create them.
/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
#[derive(Clone)]
pub(crate) enum Inst {
/// nops of various sizes, including zero
Nop { len: u8 },
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
Alu_RMI_R {
is_64: bool,
op: RMI_R_Op,
src: RMI,
dst: Writable<Reg>,
},
/// (imm32 imm64) reg.
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
Imm_R {
dst_is_64: bool,
simm64: u64,
dst: Writable<Reg>,
},
/// mov (64 32) reg reg
Mov_R_R {
is_64: bool,
src: Reg,
dst: Writable<Reg>,
},
/// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
/// Note that the lq variant doesn't really exist since the default
/// zero-extend rule makes it unnecessary. For that case we emit the
/// equivalent "movl AM, reg32".
MovZX_M_R {
extMode: ExtMode,
addr: Addr,
dst: Writable<Reg>,
},
/// A plain 64-bit integer load, since MovZX_M_R can't represent that
Mov64_M_R { addr: Addr, dst: Writable<Reg> },
/// movs (bl bq wl wq lq) addr reg (good for all SX loads)
MovSX_M_R {
extMode: ExtMode,
addr: Addr,
dst: Writable<Reg>,
},
/// mov (b w l q) reg addr (good for all integer stores)
Mov_R_M {
size: u8, // 1, 2, 4 or 8
src: Reg,
addr: Addr,
},
/// (shl shr sar) (l q) imm reg
Shift_R {
is_64: bool,
kind: ShiftKind,
/// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
num_bits: Option<u8>,
dst: Writable<Reg>,
},
/// cmp (b w l q) (reg addr imm) reg
Cmp_RMI_R {
size: u8, // 1, 2, 4 or 8
src: RMI,
dst: Reg,
},
/// pushq (reg addr imm)
Push64 { src: RMI },
/// popq reg
Pop64 { dst: Writable<Reg> },
/// call simm32
CallKnown {
dest: ExternalName,
uses: Set<Reg>,
defs: Set<Writable<Reg>>,
},
/// callq (reg mem)
CallUnknown {
dest: RM,
//uses: Set<Reg>,
//defs: Set<Writable<Reg>>,
},
// ---- branches (exactly one must appear at end of BB) ----
/// ret
Ret,
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
/// inserted there.
EpiloguePlaceholder,
/// jmp simm32
JmpKnown { dest: BranchTarget },
/// jcond cond target target
/// Symmetrical two-way conditional branch.
/// Emitted as a compound sequence; the MachBuffer will shrink it
/// as appropriate.
JmpCondSymm {
cc: CC,
taken: BranchTarget,
not_taken: BranchTarget,
},
/// jmpq (reg mem)
JmpUnknown { target: RM },
}
// Handy constructors for Insts.
// For various sizes, will some number of lowest bits sign extend to be the
// same as the whole value?
pub(crate) fn low32willSXto64(x: u64) -> bool {
let xs = x as i64;
xs == ((xs << 32) >> 32)
}
impl Inst {
pub(crate) fn nop(len: u8) -> Self {
debug_assert!(len <= 16);
Self::Nop { len }
}
pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Self::Alu_RMI_R {
is_64,
op,
src,
dst,
}
}
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
if !dst_is_64 {
debug_assert!(low32willSXto64(simm64));
}
Inst::Imm_R {
dst_is_64,
simm64,
dst,
}
}
pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
debug_assert!(src.get_class() == RegClass::I64);
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Mov_R_R { is_64, src, dst }
}
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovZX_M_R { extMode, addr, dst }
}
pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Mov64_M_R { addr, dst }
}
pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovSX_M_R { extMode, addr, dst }
}
pub(crate) fn mov_r_m(
size: u8, // 1, 2, 4 or 8
src: Reg,
addr: Addr,
) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(src.get_class() == RegClass::I64);
Inst::Mov_R_M { size, src, addr }
}
pub(crate) fn shift_r(
is_64: bool,
kind: ShiftKind,
num_bits: Option<u8>,
dst: Writable<Reg>,
) -> Inst {
debug_assert!(if let Some(num_bits) = num_bits {
num_bits < if is_64 { 64 } else { 32 }
} else {
true
});
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
}
}
pub(crate) fn cmp_rmi_r(
size: u8, // 1, 2, 4 or 8
src: RMI,
dst: Reg,
) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(dst.get_class() == RegClass::I64);
Inst::Cmp_RMI_R { size, src, dst }
}
pub(crate) fn push64(src: RMI) -> Inst {
Inst::Push64 { src }
}
pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
Inst::Pop64 { dst }
}
pub(crate) fn call_unknown(dest: RM) -> Inst {
Inst::CallUnknown { dest }
}
pub(crate) fn ret() -> Inst {
Inst::Ret
}
pub(crate) fn epilogue_placeholder() -> Inst {
Inst::EpiloguePlaceholder
}
pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
Inst::JmpKnown { dest }
}
pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
Inst::JmpCondSymm {
cc,
taken,
not_taken,
}
}
pub(crate) fn jmp_unknown(target: RM) -> Inst {
Inst::JmpUnknown { target }
}
}
//=============================================================================
// Instructions: printing
impl ShowWithRRU for Inst {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
fn ljustify(s: String) -> String {
let w = 7;
if s.len() >= w {
s
} else {
let need = usize::min(w, w - s.len());
s + &format!("{nil: <width$}", nil = "", width = need)
}
}
fn ljustify2(s1: String, s2: String) -> String {
ljustify(s1 + &s2)
}
fn suffixLQ(is_64: bool) -> String {
(if is_64 { "q" } else { "l" }).to_string()
}
fn sizeLQ(is_64: bool) -> u8 {
if is_64 {
8
} else {
4
}
}
fn suffixBWLQ(size: u8) -> String {
match size {
1 => "b".to_string(),
2 => "w".to_string(),
4 => "l".to_string(),
8 => "q".to_string(),
_ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
}
}
match self {
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
Inst::Alu_RMI_R {
is_64,
op,
src,
dst,
} => format!(
"{} {}, {}",
ljustify2(op.to_string(), suffixLQ(*is_64)),
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
),
Inst::Imm_R {
dst_is_64,
simm64,
dst,
} => {
if *dst_is_64 {
format!(
"{} ${}, {}",
ljustify("movabsq".to_string()),
*simm64 as i64,
show_ireg_sized(dst.to_reg(), mb_rru, 8)
)
} else {
format!(
"{} ${}, {}",
ljustify("movl".to_string()),
(*simm64 as u32) as i32,
show_ireg_sized(dst.to_reg(), mb_rru, 4)
)
}
}
Inst::Mov_R_R { is_64, src, dst } => format!(
"{} {}, {}",
ljustify2("mov".to_string(), suffixLQ(*is_64)),
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Inst::MovZX_M_R { extMode, addr, dst } => {
if *extMode == ExtMode::LQ {
format!(
"{} {}, {}",
ljustify("movl".to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, 4)
)
} else {
format!(
"{} {}, {}",
ljustify2("movz".to_string(), extMode.to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
)
}
}
Inst::Mov64_M_R { addr, dst } => format!(
"{} {}, {}",
ljustify("movq".to_string()),
addr.show_rru(mb_rru),
dst.show_rru(mb_rru)
),
Inst::MovSX_M_R { extMode, addr, dst } => format!(
"{} {}, {}",
ljustify2("movs".to_string(), extMode.to_string()),
addr.show_rru(mb_rru),
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
),
Inst::Mov_R_M { size, src, addr } => format!(
"{} {}, {}",
ljustify2("mov".to_string(), suffixBWLQ(*size)),
show_ireg_sized(*src, mb_rru, *size),
addr.show_rru(mb_rru)
),
Inst::Shift_R {
is_64,
kind,
num_bits,
dst,
} => match num_bits {
None => format!(
"{} %cl, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Some(num_bits) => format!(
"{} ${}, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)),
num_bits,
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
},
Inst::Cmp_RMI_R { size, src, dst } => format!(
"{} {}, {}",
ljustify2("cmp".to_string(), suffixBWLQ(*size)),
src.show_rru_sized(mb_rru, *size),
show_ireg_sized(*dst, mb_rru, *size)
),
Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
}
Inst::Pop64 { dst } => {
format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
}
//Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
Inst::CallKnown { .. } => "**CallKnown**".to_string(),
Inst::CallUnknown { dest } => format!(
"{} *{}",
ljustify("call".to_string()),
dest.show_rru(mb_rru)
),
Inst::Ret => "ret".to_string(),
Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
Inst::JmpKnown { dest } => {
format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
}
Inst::JmpCondSymm {
cc,
taken,
not_taken,
} => format!(
"{} taken={} not_taken={}",
ljustify2("j".to_string(), cc.to_string()),
taken.show_rru(mb_rru),
not_taken.show_rru(mb_rru)
),
//
Inst::JmpUnknown { target } => format!(
"{} *{}",
ljustify("jmp".to_string()),
target.show_rru(mb_rru)
),
}
}
}
// Temp hook for legacy printing machinery
impl fmt::Debug for Inst {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
// Print the insn without a Universe :-(
write!(fmt, "{}", self.show_rru(None))
}
}
fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
// This is a bit subtle. If some register is in the modified set, then it may not be in either
// the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
// sets.
match inst {
// ** Nop
Inst::Alu_RMI_R {
is_64: _,
op: _,
src,
dst,
} => {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
dst,
} => {
collector.add_def(*dst);
}
Inst::Mov_R_R { is_64: _, src, dst } => {
collector.add_use(*src);
collector.add_def(*dst);
}
Inst::MovZX_M_R {
extMode: _,
addr,
dst,
} => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::Mov64_M_R { addr, dst } => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::MovSX_M_R {
extMode: _,
addr,
dst,
} => {
addr.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::Mov_R_M { size: _, src, addr } => {
collector.add_use(*src);
addr.get_regs_as_uses(collector);
}
Inst::Shift_R {
is_64: _,
kind: _,
num_bits,
dst,
} => {
if num_bits.is_none() {
collector.add_use(regs::rcx());
}
collector.add_mod(*dst);
}
Inst::Cmp_RMI_R { size: _, src, dst } => {
src.get_regs_as_uses(collector);
collector.add_use(*dst); // yes, really `add_use`
}
Inst::Push64 { src } => {
src.get_regs_as_uses(collector);
collector.add_mod(Writable::from_reg(regs::rsp()));
}
Inst::Pop64 { dst } => {
collector.add_def(*dst);
}
Inst::CallKnown {
dest: _,
uses: _,
defs: _,
} => {
// FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
unimplemented!();
}
Inst::CallUnknown { dest } => {
dest.get_regs_as_uses(collector);
}
Inst::Ret => {}
Inst::EpiloguePlaceholder => {}
Inst::JmpKnown { dest: _ } => {}
Inst::JmpCondSymm {
cc: _,
taken: _,
not_taken: _,
} => {}
//Inst::JmpUnknown { target } => {
// target.get_regs_as_uses(collector);
//}
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
}
}
//=============================================================================
// Instructions and subcomponents: map_regs
fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
if r.is_virtual() {
let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
*r = new;
}
}
fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
if r.to_reg().is_virtual() {
let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
*r = Writable::from_reg(new);
}
}
fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
if r.to_reg().is_virtual() {
let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
*r = Writable::from_reg(new);
}
}
impl Addr {
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
match self {
Addr::IR {
simm32: _,
ref mut base,
} => map_use(map, base),
Addr::IRRS {
simm32: _,
ref mut base,
ref mut index,
shift: _,
} => {
map_use(map, base);
map_use(map, index);
}
}
}
}
impl RMI {
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
match self {
RMI::R { ref mut reg } => map_use(map, reg),
RMI::M { ref mut addr } => addr.map_uses(map),
RMI::I { simm32: _ } => {}
}
}
}
impl RM {
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
match self {
RM::R { ref mut reg } => map_use(map, reg),
RM::M { ref mut addr } => addr.map_uses(map),
}
}
}
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
// Note this must be carefully synchronized with x64_get_regs.
match inst {
// ** Nop
Inst::Alu_RMI_R {
is_64: _,
op: _,
ref mut src,
ref mut dst,
} => {
src.map_uses(mapper);
map_mod(mapper, dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
ref mut dst,
} => map_def(mapper, dst),
Inst::Mov_R_R {
is_64: _,
ref mut src,
ref mut dst,
} => {
map_use(mapper, src);
map_def(mapper, dst);
}
Inst::MovZX_M_R {
extMode: _,
ref mut addr,
ref mut dst,
} => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::Mov64_M_R { addr, dst } => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::MovSX_M_R {
extMode: _,
ref mut addr,
ref mut dst,
} => {
addr.map_uses(mapper);
map_def(mapper, dst);
}
Inst::Mov_R_M {
size: _,
ref mut src,
ref mut addr,
} => {
map_use(mapper, src);
addr.map_uses(mapper);
}
Inst::Shift_R {
is_64: _,
kind: _,
num_bits: _,
ref mut dst,
} => {
map_mod(mapper, dst);
}
Inst::Cmp_RMI_R {
size: _,
ref mut src,
ref mut dst,
} => {
src.map_uses(mapper);
map_use(mapper, dst);
}
Inst::Push64 { ref mut src } => src.map_uses(mapper),
Inst::Pop64 { ref mut dst } => {
map_def(mapper, dst);
}
Inst::CallKnown {
dest: _,
uses: _,
defs: _,
} => {}
Inst::CallUnknown { dest } => dest.map_uses(mapper),
Inst::Ret => {}
Inst::EpiloguePlaceholder => {}
Inst::JmpKnown { dest: _ } => {}
Inst::JmpCondSymm {
cc: _,
taken: _,
not_taken: _,
} => {}
//Inst::JmpUnknown { target } => {
// target.apply_map(mapper);
//}
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
}
}
//=============================================================================
// Instructions: misc functions and external interface
impl MachInst for Inst {
fn get_regs(&self, collector: &mut RegUsageCollector) {
x64_get_regs(&self, collector)
}
fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
x64_map_regs(self, mapper);
}
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
// out the upper 32 bits of the destination. For example, we could
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
// %reg.
match self {
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
_ => None,
}
}
fn is_epilogue_placeholder(&self) -> bool {
if let Self::EpiloguePlaceholder = self {
true
} else {
false
}
}
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
match self {
// Interesting cases.
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
&Self::JmpCondSymm {
cc: _,
taken,
not_taken,
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
// All other cases are boring.
_ => MachTerminator::None,
}
}
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
let rc_dst = dst_reg.to_reg().get_class();
let rc_src = src_reg.get_class();
// If this isn't true, we have gone way off the rails.
debug_assert!(rc_dst == rc_src);
match rc_dst {
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
_ => panic!("gen_move(x64): unhandled regclass"),
}
}
fn gen_zero_len_nop() -> Inst {
unimplemented!()
}
fn gen_nop(_preferred_size: usize) -> Inst {
unimplemented!()
}
fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
None
}
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
))),
}
}
fn gen_jump(label: MachLabel) -> Inst {
Inst::jmp_known(BranchTarget::Label(label))
}
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
let mut ret = SmallVec::new();
let is64 = value > 0xffff_ffff;
ret.push(Inst::imm_r(is64, value, to_reg));
ret
}
fn reg_universe(flags: &Flags) -> RealRegUniverse {
create_reg_universe_systemv(flags)
}
fn worst_case_size() -> CodeOffset {
15
}
type LabelUse = LabelUse;
}
impl MachInstEmit for Inst {
type State = ();
fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
emit::emit(self, sink);
}
}
/// A label-use (internal relocation) in generated code.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum LabelUse {
/// A 32-bit offset from location of relocation itself, added to the
/// existing value at that location.
Rel32,
}
impl MachInstLabelUse for LabelUse {
const ALIGN: CodeOffset = 1;
fn max_pos_range(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0x7fff_ffff,
}
}
fn max_neg_range(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0x8000_0000,
}
}
fn patch_size(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 4,
}
}
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
match self {
LabelUse::Rel32 => {
let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
let value = i32::try_from(label_offset)
.unwrap()
.wrapping_sub(i32::try_from(use_offset).unwrap())
.wrapping_add(addend);
buffer.copy_from_slice(&value.to_le_bytes()[..]);
}
}
}
fn supports_veneer(self) -> bool {
match self {
LabelUse::Rel32 => false,
}
}
fn veneer_size(self) -> CodeOffset {
match self {
LabelUse::Rel32 => 0,
}
}
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
match self {
LabelUse::Rel32 => {
panic!("Veneer not supported for Rel32 label-use.");
}
}
}
}

261
third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,261 @@
//! Registers, the Universe thereof, and printing.
//!
//! These are ordered by sequence number, as required in the Universe. The strange ordering is
//! intended to make callee-save registers available before caller-saved ones. This is a net win
//! provided that each function makes at least one onward call. It'll be a net loss for leaf
//! functions, and we should change the ordering in that case, so as to make caller-save regs
//! available first.
//!
//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
//! for each function we compile.
use alloc::vec::Vec;
use std::string::String;
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
use crate::machinst::pretty_print::ShowWithRRU;
use crate::settings;
// Hardware encodings for a few registers.
pub const ENC_RBX: u8 = 3;
pub const ENC_RSP: u8 = 4;
pub const ENC_RBP: u8 = 5;
pub const ENC_R12: u8 = 12;
pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14;
pub const ENC_R15: u8 = 15;
fn gpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::I64, enc, index)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 0)
}
pub(crate) fn r13() -> Reg {
gpr(ENC_R13, 1)
}
pub(crate) fn r14() -> Reg {
gpr(ENC_R14, 2)
}
pub(crate) fn r15() -> Reg {
gpr(ENC_R15, 3)
}
pub(crate) fn rbx() -> Reg {
gpr(ENC_RBX, 4)
}
pub(crate) fn rsi() -> Reg {
gpr(6, 5)
}
pub(crate) fn rdi() -> Reg {
gpr(7, 6)
}
pub(crate) fn rax() -> Reg {
gpr(0, 7)
}
pub(crate) fn rcx() -> Reg {
gpr(1, 8)
}
pub(crate) fn rdx() -> Reg {
gpr(2, 9)
}
pub(crate) fn r8() -> Reg {
gpr(8, 10)
}
pub(crate) fn r9() -> Reg {
gpr(9, 11)
}
pub(crate) fn r10() -> Reg {
gpr(10, 12)
}
pub(crate) fn r11() -> Reg {
gpr(11, 13)
}
fn fpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::V128, enc, index)
}
fn xmm0() -> Reg {
fpr(0, 14)
}
fn xmm1() -> Reg {
fpr(1, 15)
}
fn xmm2() -> Reg {
fpr(2, 16)
}
fn xmm3() -> Reg {
fpr(3, 17)
}
fn xmm4() -> Reg {
fpr(4, 18)
}
fn xmm5() -> Reg {
fpr(5, 19)
}
fn xmm6() -> Reg {
fpr(6, 20)
}
fn xmm7() -> Reg {
fpr(7, 21)
}
fn xmm8() -> Reg {
fpr(8, 22)
}
fn xmm9() -> Reg {
fpr(9, 23)
}
fn xmm10() -> Reg {
fpr(10, 24)
}
fn xmm11() -> Reg {
fpr(11, 25)
}
fn xmm12() -> Reg {
fpr(12, 26)
}
fn xmm13() -> Reg {
fpr(13, 27)
}
fn xmm14() -> Reg {
fpr(14, 28)
}
fn xmm15() -> Reg {
fpr(15, 29)
}
pub(crate) fn rsp() -> Reg {
gpr(ENC_RSP, 30)
}
pub(crate) fn rbp() -> Reg {
gpr(ENC_RBP, 31)
}
/// Create the register universe for X64.
///
/// The ordering of registers matters, as commented in the file doc comment: assumes the
/// calling-convention is SystemV, at the moment.
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
let mut regs = Vec::<(RealReg, String)>::new();
let mut allocable_by_class = [None; NUM_REG_CLASSES];
// Integer regs.
let mut base = regs.len();
// Callee-saved, in the SystemV x86_64 ABI.
regs.push((r12().to_real_reg(), "%r12".into()));
regs.push((r13().to_real_reg(), "%r13".into()));
regs.push((r14().to_real_reg(), "%r14".into()));
regs.push((r15().to_real_reg(), "%r15".into()));
regs.push((rbx().to_real_reg(), "%rbx".into()));
// Caller-saved, in the SystemV x86_64 ABI.
regs.push((rsi().to_real_reg(), "%rsi".into()));
regs.push((rdi().to_real_reg(), "%rdi".into()));
regs.push((rax().to_real_reg(), "%rax".into()));
regs.push((rcx().to_real_reg(), "%rcx".into()));
regs.push((rdx().to_real_reg(), "%rdx".into()));
regs.push((r8().to_real_reg(), "%r8".into()));
regs.push((r9().to_real_reg(), "%r9".into()));
regs.push((r10().to_real_reg(), "%r10".into()));
regs.push((r11().to_real_reg(), "%r11".into()));
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(r12().get_index()),
});
// XMM registers
base = regs.len();
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
regs.push((xmm3().to_real_reg(), "%xmm3".into()));
regs.push((xmm4().to_real_reg(), "%xmm4".into()));
regs.push((xmm5().to_real_reg(), "%xmm5".into()));
regs.push((xmm6().to_real_reg(), "%xmm6".into()));
regs.push((xmm7().to_real_reg(), "%xmm7".into()));
regs.push((xmm8().to_real_reg(), "%xmm8".into()));
regs.push((xmm9().to_real_reg(), "%xmm9".into()));
regs.push((xmm10().to_real_reg(), "%xmm10".into()));
regs.push((xmm11().to_real_reg(), "%xmm11".into()));
regs.push((xmm12().to_real_reg(), "%xmm12".into()));
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(xmm15().get_index()),
});
// Other regs, not available to the allocator.
let allocable = regs.len();
regs.push((rsp().to_real_reg(), "%rsp".into()));
regs.push((rbp().to_real_reg(), "%rbp".into()));
RealRegUniverse {
regs,
allocable,
allocable_by_class,
}
}
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
/// smaller size (4, 2 or 1 bytes).
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
let mut s = reg.show_rru(mb_rru);
if reg.get_class() != RegClass::I64 || size == 8 {
// We can't do any better.
return s;
}
if reg.is_real() {
// Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
// describe diplomatically as "a kludge", but it's only debug code.
let remapper = match s.as_str() {
"%rax" => Some(["%eax", "%ax", "%al"]),
"%rbx" => Some(["%ebx", "%bx", "%bl"]),
"%rcx" => Some(["%ecx", "%cx", "%cl"]),
"%rdx" => Some(["%edx", "%dx", "%dl"]),
"%rsi" => Some(["%esi", "%si", "%sil"]),
"%rdi" => Some(["%edi", "%di", "%dil"]),
"%rbp" => Some(["%ebp", "%bp", "%bpl"]),
"%rsp" => Some(["%esp", "%sp", "%spl"]),
"%r8" => Some(["%r8d", "%r8w", "%r8b"]),
"%r9" => Some(["%r9d", "%r9w", "%r9b"]),
"%r10" => Some(["%r10d", "%r10w", "%r10b"]),
"%r11" => Some(["%r11d", "%r11w", "%r11b"]),
"%r12" => Some(["%r12d", "%r12w", "%r12b"]),
"%r13" => Some(["%r13d", "%r13w", "%r13b"]),
"%r14" => Some(["%r14d", "%r14w", "%r14b"]),
"%r15" => Some(["%r15d", "%r15w", "%r15b"]),
_ => None,
};
if let Some(smaller_names) = remapper {
match size {
4 => s = smaller_names[0].into(),
2 => s = smaller_names[1].into(),
1 => s = smaller_names[2].into(),
_ => panic!("show_ireg_sized: real"),
}
}
} else {
// Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
let suffix = match size {
4 => "l",
2 => "w",
1 => "b",
_ => panic!("show_ireg_sized: virtual"),
};
s = s + suffix;
}
s
}

343
third_party/rust/cranelift-codegen/src/isa/x64/lower.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,343 @@
//! Lowering rules for X64.
#![allow(dead_code)]
#![allow(non_snake_case)]
use regalloc::{Reg, Writable};
use crate::ir::condcodes::IntCC;
use crate::ir::types;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::result::CodegenResult;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::isa::x64::X64Backend;
/// Context passed to all lowering functions.
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
//=============================================================================
// Helpers for instruction lowering.
fn is_int_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 => true,
_ => false,
}
}
fn int_ty_to_is64(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 => false,
types::I64 => true,
_ => panic!("type {} is none of I8, I16, I32 or I64", ty),
}
}
fn int_ty_to_sizeB(ty: Type) -> u8 {
match ty {
types::I8 => 1,
types::I16 => 2,
types::I32 => 4,
types::I64 => 8,
_ => panic!("ity_to_sizeB"),
}
}
fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
let inst_data = ctx.data(iri);
if inst_data.opcode() == Opcode::Null {
Some(0)
} else {
match inst_data {
&InstructionData::UnaryImm { opcode: _, imm } => {
// Only has Into for i64; we use u64 elsewhere, so we cast.
let imm: i64 = imm.into();
Some(imm as u64)
}
_ => None,
}
}
}
fn inst_condcode(data: &InstructionData) -> IntCC {
match data {
&InstructionData::IntCond { cond, .. }
| &InstructionData::BranchIcmp { cond, .. }
| &InstructionData::IntCompare { cond, .. }
| &InstructionData::IntCondTrap { cond, .. }
| &InstructionData::BranchInt { cond, .. }
| &InstructionData::IntSelect { cond, .. }
| &InstructionData::IntCompareImm { cond, .. } => cond,
_ => panic!("inst_condcode(x64): unhandled: {:?}", data),
}
}
fn intCC_to_x64_CC(cc: IntCC) -> CC {
match cc {
IntCC::Equal => CC::Z,
IntCC::NotEqual => CC::NZ,
IntCC::SignedGreaterThanOrEqual => CC::NL,
IntCC::SignedGreaterThan => CC::NLE,
IntCC::SignedLessThanOrEqual => CC::LE,
IntCC::SignedLessThan => CC::L,
IntCC::UnsignedGreaterThanOrEqual => CC::NB,
IntCC::UnsignedGreaterThan => CC::NBE,
IntCC::UnsignedLessThanOrEqual => CC::BE,
IntCC::UnsignedLessThan => CC::B,
IntCC::Overflow => CC::O,
IntCC::NotOverflow => CC::NO,
}
}
fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
let inputs = ctx.get_input(iri, input);
ctx.use_input_reg(inputs);
inputs.reg
}
fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
ctx.get_output(iri, output)
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
/// Actually codegen an instruction's results into registers.
fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
let op = ctx.data(iri).opcode();
let ty = if ctx.num_outputs(iri) == 1 {
Some(ctx.output_ty(iri, 0))
} else {
None
};
// This is all outstandingly feeble. TODO: much better!
match op {
Opcode::Iconst => {
if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
// Get exactly the bit pattern in 'w64' into the dest. No
// monkeying with sign extension etc.
let dstIs64 = w64 > 0xFFFF_FFFF;
let regD = output_to_reg(ctx, iri, 0);
ctx.emit(Inst::imm_r(dstIs64, w64, regD));
} else {
unimplemented!();
}
}
Opcode::Iadd | Opcode::Isub => {
let regD = output_to_reg(ctx, iri, 0);
let regL = input_to_reg(ctx, iri, 0);
let regR = input_to_reg(ctx, iri, 1);
let is64 = int_ty_to_is64(ty.unwrap());
let how = if op == Opcode::Iadd {
RMI_R_Op::Add
} else {
RMI_R_Op::Sub
};
ctx.emit(Inst::mov_r_r(true, regL, regD));
ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
}
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
// TODO: implement imm shift value into insn
let tySL = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
let regSL = input_to_reg(ctx, iri, 0);
let regSR = input_to_reg(ctx, iri, 1);
let regD = output_to_reg(ctx, iri, 0);
if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
let how = match op {
Opcode::Ishl => ShiftKind::Left,
Opcode::Ushr => ShiftKind::RightZ,
Opcode::Sshr => ShiftKind::RightS,
_ => unreachable!(),
};
let is64 = tyD == types::I64;
let r_rcx = regs::rcx();
let w_rcx = Writable::<Reg>::from_reg(r_rcx);
ctx.emit(Inst::mov_r_r(true, regSL, regD));
ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
} else {
unimplemented!()
}
}
Opcode::Uextend | Opcode::Sextend => {
// TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
// don't accept a register source operand. They should be changed
// so as to have _RM_R form.
// TODO2: if the source operand is a load, incorporate that.
let isZX = op == Opcode::Uextend;
let tyS = ctx.input_ty(iri, 0);
let tyD = ctx.output_ty(iri, 0);
let regS = input_to_reg(ctx, iri, 0);
let regD = output_to_reg(ctx, iri, 0);
ctx.emit(Inst::mov_r_r(true, regS, regD));
match (tyS, tyD, isZX) {
(types::I8, types::I64, false) => {
ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
}
_ => unimplemented!(),
}
}
Opcode::FallthroughReturn | Opcode::Return => {
for i in 0..ctx.num_inputs(iri) {
let src_reg = input_to_reg(ctx, iri, i);
let retval_reg = ctx.retval(i);
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
}
// N.B.: the Ret itself is generated by the ABI.
}
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::UdivImm
| Opcode::SdivImm
| Opcode::UremImm
| Opcode::SremImm
| Opcode::IrsubImm
| Opcode::IaddCin
| Opcode::IaddIfcin
| Opcode::IaddCout
| Opcode::IaddIfcout
| Opcode::IaddCarry
| Opcode::IaddIfcarry
| Opcode::IsubBin
| Opcode::IsubIfbin
| Opcode::IsubBout
| Opcode::IsubIfbout
| Opcode::IsubBorrow
| Opcode::IsubIfborrow
| Opcode::BandImm
| Opcode::BorImm
| Opcode::BxorImm
| Opcode::RotlImm
| Opcode::RotrImm
| Opcode::IshlImm
| Opcode::UshrImm
| Opcode::SshrImm => {
panic!("ALU+imm and ALU+carry ops should not appear here!");
}
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
}
}
//=============================================================================
// Lowering-backend trait implementation.
impl LowerBackend for X64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_insn_to_regs(ctx, ir_inst);
Ok(())
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
&self,
ctx: &mut C,
branches: &[IRInst],
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
// unconditional branch or fallthrough. Otherwise, if only one branch,
// it may be an unconditional branch, a fallthrough, a return, or a
// trap. These conditions are verified by `is_ebb_basic()` during the
// verifier pass.
assert!(branches.len() <= 2);
let mut unimplemented = false;
if branches.len() == 2 {
// Must be a conditional branch followed by an unconditional branch.
let op0 = ctx.data(branches[0]).opcode();
let op1 = ctx.data(branches[1]).opcode();
println!(
"QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
op0, op1
);
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Label(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Label(targets[1]),
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
match op0 {
Opcode::Brz | Opcode::Brnz => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rS = input_to_reg(ctx, branches[0], 0);
let cc = match op0 {
Opcode::Brz => CC::Z,
Opcode::Brnz => CC::NZ,
_ => unreachable!(),
};
let sizeB = int_ty_to_sizeB(tyS);
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
} else {
unimplemented = true;
}
}
Opcode::BrIcmp => {
let tyS = ctx.input_ty(branches[0], 0);
if is_int_ty(tyS) {
let rSL = input_to_reg(ctx, branches[0], 0);
let rSR = input_to_reg(ctx, branches[0], 1);
let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
let sizeB = int_ty_to_sizeB(tyS);
// FIXME verify rSR vs rSL ordering
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
} else {
unimplemented = true;
}
}
// TODO: Brif/icmp, Brff/icmp, jump tables
_ => {
unimplemented = true;
}
}
} else {
assert!(branches.len() == 1);
// Must be an unconditional branch or trap.
let op = ctx.data(branches[0]).opcode();
match op {
Opcode::Jump => {
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
}
Opcode::Fallthrough => {
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
}
Opcode::Trap => {
unimplemented = true;
}
_ => panic!("Unknown branch type!"),
}
}
if unimplemented {
unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
}
Ok(())
}
}

112
third_party/rust/cranelift-codegen/src/isa/x64/mod.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,112 @@
//! X86_64-bit Instruction Set Architecture.
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use target_lexicon::Triple;
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::pretty_print::ShowWithRRU;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self, Flags};
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
mod abi;
mod inst;
mod lower;
/// An X64 backend.
pub(crate) struct X64Backend {
triple: Triple,
flags: Flags,
reg_universe: RealRegUniverse,
}
impl X64Backend {
/// Create a new X64 backend with the given (shared) flags.
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
let reg_universe = create_reg_universe_systemv(&flags);
Self {
triple,
flags,
reg_universe,
}
}
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
// This performs lowering to VCode, register-allocates the code, computes
// block layout and finalizes branches. The result is ready for binary emission.
let abi = Box::new(abi::X64ABIBody::new(&func, flags));
compile::compile::<Self>(&func, self, abi)
}
}
impl MachBackend for X64Backend {
fn compile_function(
&self,
func: &Function,
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let buffer = vcode.emit();
let buffer = buffer.finish();
let frame_size = vcode.frame_size();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
} else {
None
};
Ok(MachCompileResult {
buffer,
frame_size,
disasm,
})
}
fn flags(&self) -> &Flags {
&self.flags
}
fn name(&self) -> &'static str {
"x64"
}
fn triple(&self) -> Triple {
self.triple.clone()
}
fn reg_universe(&self) -> &RealRegUniverse {
&self.reg_universe
}
fn unsigned_add_overflow_condition(&self) -> IntCC {
// Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
// overflow of an add.
IntCC::UnsignedGreaterThanOrEqual
}
fn unsigned_sub_overflow_condition(&self) -> IntCC {
// unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
// underflow of a subtract (carry is borrow for subtract).
IntCC::UnsignedGreaterThanOrEqual
}
}
/// Create a new `isa::Builder`.
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
let backend = X64Backend::new_with_flags(triple, flags);
Box::new(TargetIsaAdapter::new(backend))
},
}
}

Просмотреть файл

@ -6,7 +6,6 @@ use super::settings as isa_settings;
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
use crate::cursor::{Cursor, CursorPosition, EncCursor};
use crate::ir;
use crate::ir::entities::StackSlot;
use crate::ir::immediates::Imm64;
use crate::ir::stackslot::{StackOffset, StackSize};
use crate::ir::types;
@ -19,7 +18,6 @@ use crate::regalloc::RegisterSet;
use crate::result::CodegenResult;
use crate::stack_layout::layout_stack;
use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::i32;
use target_lexicon::{PointerWidth, Triple};
@ -44,7 +42,7 @@ static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
///
/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
/// convention reserves spill space for parameters, you dont have to use them as such"
const WIN_SHADOW_STACK_SPACE: i32 = 32;
const WIN_SHADOW_STACK_SPACE: StackSize = 32;
/// Stack alignment requirement for functions.
///
@ -72,6 +70,7 @@ struct Args {
shared_flags: shared_settings::Flags,
#[allow(dead_code)]
isa_flags: isa_settings::Flags,
assigning_returns: bool,
}
impl Args {
@ -82,12 +81,13 @@ impl Args {
call_conv: CallConv,
shared_flags: &shared_settings::Flags,
isa_flags: &isa_settings::Flags,
assigning_returns: bool,
) -> Self {
let offset = if call_conv.extends_windows_fastcall() {
WIN_SHADOW_STACK_SPACE
} else {
0
} as u32;
};
Self {
pointer_bytes: bits / 8,
@ -101,6 +101,7 @@ impl Args {
call_conv,
shared_flags: shared_flags.clone(),
isa_flags: isa_flags.clone(),
assigning_returns,
}
}
}
@ -109,6 +110,17 @@ impl ArgAssigner for Args {
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
let ty = arg.value_type;
if ty.bits() > u16::from(self.pointer_bits) {
if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
// "Any argument that doesn't fit in 8 bytes, or isn't
// 1, 2, 4, or 8 bytes, must be passed by reference"
return ValueConversion::Pointer(self.pointer_type).into();
} else if !ty.is_vector() && !ty.is_float() {
// On SystemV large integers and booleans are broken down to fit in a register.
return ValueConversion::IntSplit.into();
}
}
// Vectors should stay in vector registers unless SIMD is not enabled--then they are split
if ty.is_vector() {
if self.shared_flags.enable_simd() {
@ -119,11 +131,6 @@ impl ArgAssigner for Args {
return ValueConversion::VectorSplit.into();
}
// Large integers and booleans are broken down to fit in a register.
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
return ValueConversion::IntSplit.into();
}
// Small integers are extended to the size of a pointer register.
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
match arg.extension {
@ -205,7 +212,7 @@ pub fn legalize_signature(
PointerWidth::U16 => panic!(),
PointerWidth::U32 => {
bits = 32;
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags);
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
}
PointerWidth::U64 => {
bits = 64;
@ -217,6 +224,7 @@ pub fn legalize_signature(
sig.call_conv,
shared_flags,
isa_flags,
false,
)
} else {
Args::new(
@ -226,6 +234,7 @@ pub fn legalize_signature(
sig.call_conv,
shared_flags,
isa_flags,
false,
)
};
}
@ -245,26 +254,20 @@ pub fn legalize_signature(
sig.call_conv,
shared_flags,
isa_flags,
true,
);
let sig_is_multi_return = sig.is_multi_return();
// If this is a multi-value return and we don't have enough available return
// registers to fit all of the return values, we need to backtrack and start
// If we don't have enough available return registers
// to fit all of the return values, we need to backtrack and start
// assigning locations all over again with a different strategy. In order to
// do that, we need a copy of the original assigner for the returns.
let backup_rets_for_struct_return = if sig_is_multi_return {
Some(rets.clone())
} else {
None
};
let mut backup_rets = rets.clone();
if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
if sig.is_multi_return()
&& new_returns
.iter()
.filter(|r| r.purpose == ArgumentPurpose::Normal)
.any(|r| !r.location.is_reg())
if new_returns
.iter()
.filter(|r| r.purpose == ArgumentPurpose::Normal)
.any(|r| !r.location.is_reg())
{
// The return values couldn't all fit into available return
// registers. Introduce the use of a struct-return parameter.
@ -276,6 +279,7 @@ pub fn legalize_signature(
purpose: ArgumentPurpose::StructReturn,
extension: ArgumentExtension::None,
location: ArgumentLoc::Unassigned,
legalized_to_pointer: false,
};
match args.assign(&ret_ptr_param) {
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
@ -285,8 +289,6 @@ pub fn legalize_signature(
_ => unreachable!("return pointer should always get a register assignment"),
}
let mut backup_rets = backup_rets_for_struct_return.unwrap();
// We're using the first return register for the return pointer (like
// sys v does).
let mut ret_ptr_return = AbiParam {
@ -294,6 +296,7 @@ pub fn legalize_signature(
purpose: ArgumentPurpose::StructReturn,
extension: ArgumentExtension::None,
location: ArgumentLoc::Unassigned,
legalized_to_pointer: false,
};
match backup_rets.assign(&ret_ptr_return) {
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
@ -501,7 +504,7 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
let word_size = StackSize::from(isa.pointer_bytes());
let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
WIN_SHADOW_STACK_SPACE as u32
WIN_SHADOW_STACK_SPACE
} else {
0
};
@ -525,50 +528,60 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
}
let csrs = callee_saved_regs_used(isa, func);
// The reserved stack area is composed of:
// return address + frame pointer + all callee-saved registers + shadow space
// return address + frame pointer + all callee-saved registers
//
// Pushing the return address is an implicit function of the `call`
// instruction. Each of the others we will then push explicitly. Then we
// will adjust the stack pointer to make room for the rest of the required
// space for this frame.
let word_size = isa.pointer_bytes() as usize;
let num_fprs = csrs.iter(FPR).len();
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
let csrs = callee_saved_regs_used(isa, func);
let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
// Only create an FPR stack slot if we're going to save FPRs.
let fpr_slot = if num_fprs > 0 {
// Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it
// seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be
// through `stack_load` and `stack_store` (see later comment about issue #1198). Even
// though in a certain light FPR preserve/restore is "spilling" an argument, regalloc
// implies that `SpillSlot` may be eligible for certain optimizations, and we know with
// certainty that this space may not be reused in the function, nor moved around.
Some(func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::ExplicitSlot,
size: (num_fprs * types::F64X2.bytes() as usize) as u32,
offset: None,
}))
} else {
None
};
// FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
if fpsr_stack_size > 0 {
csr_stack_size = (csr_stack_size + 15) & !15;
}
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
// since cranelift does not support spill slots before incoming args
func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::IncomingArg,
size: csr_stack_size as u32,
offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)),
size: csr_stack_size,
offset: Some(-(csr_stack_size as StackOffset)),
});
let is_leaf = func.is_leaf();
// If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
if !is_leaf {
// TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::ExplicitSlot,
size: WIN_SHADOW_STACK_SPACE,
offset: None,
});
}
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
// Subtract the GPR saved register size from the local size because pushes are used for the saves
let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
// Add CSRs to function signature
let reg_type = isa.pointer_type();
let sp_arg_index = if fpsr_stack_size > 0 {
let sp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::CalleeSaved,
RU::rsp as RegUnit,
);
let index = func.signature.params.len();
func.signature.params.push(sp_arg);
Some(index)
} else {
None
};
let fp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::FramePointer,
@ -601,19 +614,13 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
local_stack_size,
reg_type,
&csrs,
fpr_slot.as_ref(),
sp_arg_index.is_some(),
isa,
);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_common_epilogues(
&mut pos,
local_stack_size,
reg_type,
&csrs,
fpr_slot.as_ref(),
);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
Ok(())
}
@ -649,6 +656,20 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
// Add CSRs to function signature
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
// On X86-32 all parameters, including vmctx, are passed on stack, and we need
// to extract vmctx from the stack before we can save the frame pointer.
let sp_arg_index = if isa.pointer_bits() == 32 {
let sp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::CalleeSaved,
RU::rsp as RegUnit,
);
let index = func.signature.params.len();
func.signature.params.push(sp_arg);
Some(index)
} else {
None
};
let fp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::FramePointer,
@ -666,11 +687,18 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
// Set up the cursor and insert the prologue
let entry_block = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
insert_common_prologue(
&mut pos,
local_stack_size,
reg_type,
&csrs,
sp_arg_index.is_some(),
isa,
);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, None);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
Ok(())
}
@ -682,9 +710,18 @@ fn insert_common_prologue(
stack_size: i64,
reg_type: ir::types::Type,
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
has_sp_param: bool,
isa: &dyn TargetIsa,
) {
let sp = if has_sp_param {
let block = pos.current_block().expect("missing block under cursor");
let sp = pos.func.dfg.append_block_param(block, reg_type);
pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
Some(sp)
} else {
None
};
// If this is a leaf function with zero stack, then there's no need to
// insert a stack check since it can't overflow anything and
// forward-progress is guarantee so long as loop are handled anyway.
@ -707,7 +744,7 @@ fn insert_common_prologue(
None => pos
.func
.stack_limit
.map(|gv| interpret_gv(pos, gv, scratch)),
.map(|gv| interpret_gv(pos, gv, sp, scratch)),
};
if let Some(stack_limit_arg) = stack_limit_arg {
insert_stack_check(pos, stack_size, stack_limit_arg);
@ -780,38 +817,27 @@ fn insert_common_prologue(
}
}
// Now that RSP is prepared for the function, we can use stack slots:
// With the stack pointer adjusted, save any callee-saved floating point registers via offset
// FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
let mut last_fpr_save = None;
if let Some(fpr_slot) = fpr_slot {
debug_assert!(csrs.iter(FPR).len() != 0);
// `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base
// address. We are well after postopt could run, so load the CSR region base once here,
// instead of hoping that the addr/store will be combined later.
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
for (i, reg) in csrs.iter(FPR).enumerate() {
// Append param to entry block
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
// function entry.
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
// Since regalloc has already run, we must assign a location.
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
let mut fpr_offset = 0;
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
+ (stack_size % types::F64X2.bytes() as i64);
for reg in csrs.iter(FPR) {
// Append param to entry Block
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
// Since regalloc has already run, we must assign a location.
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
last_fpr_save =
Some(
pos.ins()
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset),
);
fpr_offset += types::F64X2.bytes() as i32;
}
last_fpr_save = Some(pos.ins().store(
ir::MemFlags::trusted(),
csr_arg,
sp.expect("FPR save requires SP param"),
(stack_size - offset) as i32,
));
}
pos.func.prologue_end = Some(
@ -834,19 +860,55 @@ fn insert_common_prologue(
/// compared to the stack pointer, but currently it serves enough functionality
/// to get this implemented in `wasmtime` itself. This'll likely get expanded a
/// bit over time!
fn interpret_gv(pos: &mut EncCursor, gv: ir::GlobalValue, scratch: ir::ValueLoc) -> ir::Value {
fn interpret_gv(
pos: &mut EncCursor,
gv: ir::GlobalValue,
sp: Option<ir::Value>,
scratch: ir::ValueLoc,
) -> ir::Value {
match pos.func.global_values[gv] {
ir::GlobalValueData::VMContext => pos
.func
.special_param(ir::ArgumentPurpose::VMContext)
.expect("no vmcontext parameter found"),
ir::GlobalValueData::VMContext => {
let vmctx_index = pos
.func
.signature
.special_param_index(ir::ArgumentPurpose::VMContext)
.expect("no vmcontext parameter found");
match pos.func.signature.params[vmctx_index] {
AbiParam {
location: ArgumentLoc::Reg(_),
..
} => {
let entry = pos.func.layout.entry_block().unwrap();
pos.func.dfg.block_params(entry)[vmctx_index]
}
AbiParam {
location: ArgumentLoc::Stack(offset),
value_type,
..
} => {
let offset =
offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
// The following access can be marked `trusted` because it is a load of an argument. We
// know it is safe because it was safe to write it in preparing this function call.
let ret =
pos.ins()
.load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
pos.func.locations[ret] = scratch;
return ret;
}
AbiParam {
location: ArgumentLoc::Unassigned,
..
} => unreachable!(),
}
}
ir::GlobalValueData::Load {
base,
offset,
global_type,
readonly: _,
} => {
let base = interpret_gv(pos, base, scratch);
let base = interpret_gv(pos, base, sp, scratch);
let ret = pos
.ins()
.load(global_type, ir::MemFlags::trusted(), base, offset);
@ -911,13 +973,13 @@ fn insert_common_epilogues(
stack_size: i64,
reg_type: ir::types::Type,
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
sp_arg_index: Option<usize>,
) {
while let Some(block) = pos.next_block() {
pos.goto_last_inst(block);
if let Some(inst) = pos.current_inst() {
if pos.func.dfg[inst].opcode().is_return() {
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, fpr_slot);
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, sp_arg_index);
}
}
}
@ -931,56 +993,8 @@ fn insert_common_epilogue(
pos: &mut EncCursor,
reg_type: ir::types::Type,
csrs: &RegisterSet,
fpr_slot: Option<&StackSlot>,
sp_arg_index: Option<usize>,
) {
// Even though instructions to restore FPRs are inserted first, we have to append them after
// restored GPRs to satisfy parameter order in the return.
let mut restored_fpr_values = Vec::new();
// Restore FPRs before we move RSP and invalidate stack slots.
let mut first_fpr_load = None;
if let Some(fpr_slot) = fpr_slot {
debug_assert!(csrs.iter(FPR).len() != 0);
// `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base
// address. We are well after postopt could run, so load the CSR region base once here,
// instead of hoping that the addr/store will be combined later.
//
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
// function exit.
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
let mut fpr_offset = 0;
for reg in csrs.iter(FPR) {
let value = pos.ins().load(
types::F64X2,
ir::MemFlags::trusted(),
stack_addr,
fpr_offset,
);
fpr_offset += types::F64X2.bytes() as i32;
// Unlike GPRs before, we don't need to step back after reach restoration because FPR
// restoration is order-insensitive. Furthermore: we want GPR restoration to begin
// after FPR restoration, so that stack adjustments occur after we're done relying on
// StackSlot validity.
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
restored_fpr_values.push(value);
}
}
let mut sp_adjust_inst = None;
if stack_size > 0 {
sp_adjust_inst = Some(pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)));
}
// Insert the pop of the frame pointer
let fp_pop = pos.ins().x86_pop(reg_type);
let fp_pop_inst = pos.prev_inst().unwrap();
@ -991,13 +1005,47 @@ fn insert_common_epilogue(
let mut first_csr_pop_inst = None;
for reg in csrs.iter(GPR) {
let csr_pop = pos.ins().x86_pop(reg_type);
first_csr_pop_inst = Some(pos.prev_inst().unwrap());
first_csr_pop_inst = pos.prev_inst();
assert!(first_csr_pop_inst.is_some());
pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, csr_pop);
}
for value in restored_fpr_values.into_iter() {
pos.func.dfg.append_inst_arg(inst, value);
// Insert the adjustment of SP
let mut sp_adjust_inst = None;
if stack_size > 0 {
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
sp_adjust_inst = pos.prev_inst();
assert!(sp_adjust_inst.is_some());
}
let mut first_fpr_load = None;
if let Some(index) = sp_arg_index {
let sp = pos
.func
.dfg
.block_params(pos.func.layout.entry_block().unwrap())[index];
// Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
for (i, reg) in csrs.iter(FPR).enumerate() {
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
+ (stack_size % types::F64X2.bytes() as i64);
let value = pos.ins().load(
types::F64X2,
ir::MemFlags::trusted(),
sp,
(stack_size - offset) as i32,
);
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, value);
}
} else {
assert!(csrs.iter(FPR).len() == 0);
}
pos.func.epilogues_start.push(

Просмотреть файл

@ -13,6 +13,7 @@ use crate::isa::encoding::base_size;
use crate::isa::encoding::{Encoding, RecipeSizing};
use crate::isa::RegUnit;
use crate::isa::{self, TargetIsa};
use crate::legalizer::expand_as_libcall;
use crate::predicates;
use crate::regalloc::RegDiversions;
@ -246,6 +247,20 @@ fn size_with_inferred_rex_for_inreg0_inreg1(
sizing.base_size + if needs_rex { 1 } else { 0 }
}
/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
fn size_with_inferred_rex_for_inreg1_inreg2(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
// No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
|| test_input(2, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
/// Infers whether a dynamic REX prefix will be emitted, based on a single
/// input register and a single output register.
fn size_with_inferred_rex_for_inreg0_outreg0(
@ -1181,10 +1196,10 @@ fn convert_extractlane(
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::ExtractLane {
if let ir::InstructionData::BinaryImm8 {
opcode: ir::Opcode::Extractlane,
arg,
lane,
imm: lane,
} = pos.func.dfg[inst]
{
// NOTE: the following legalization assumes that the upper bits of the XMM register do
@ -1237,10 +1252,10 @@ fn convert_insertlane(
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::InsertLane {
if let ir::InstructionData::TernaryImm8 {
opcode: ir::Opcode::Insertlane,
args: [vector, replacement],
lane,
imm: lane,
} = pos.func.dfg[inst]
{
let value_type = pos.func.dfg.value_type(vector);
@ -1255,7 +1270,7 @@ fn convert_insertlane(
pos.func
.dfg
.replace(inst)
.x86_insertps(vector, immediate, replacement)
.x86_insertps(vector, replacement, immediate)
}
F64X2 => {
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
@ -1283,7 +1298,7 @@ fn convert_insertlane(
pos.func
.dfg
.replace(inst)
.x86_pinsr(vector, lane, replacement);
.x86_pinsr(vector, replacement, lane);
}
}
}
@ -1318,6 +1333,39 @@ fn convert_ineg(
}
}
fn expand_dword_to_xmm<'f>(
pos: &mut FuncCursor<'_>,
arg: ir::Value,
arg_type: ir::Type,
) -> ir::Value {
if arg_type == I64 {
let (arg_lo, arg_hi) = pos.ins().isplit(arg);
let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
let arg = pos.ins().insertlane(arg, arg_hi, 1);
let arg = pos.ins().raw_bitcast(I64X2, arg);
arg
} else {
pos.ins().bitcast(I64X2, arg)
}
}
fn contract_dword_from_xmm<'f>(
pos: &mut FuncCursor<'f>,
inst: ir::Inst,
ret: ir::Value,
ret_type: ir::Type,
) {
if ret_type == I64 {
let ret = pos.ins().raw_bitcast(I32X4, ret);
let ret_lo = pos.ins().extractlane(ret, 0);
let ret_hi = pos.ins().extractlane(ret, 1);
pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
} else {
let ret = pos.ins().extractlane(ret, 0);
pos.func.dfg.replace(inst).ireduce(ret_type, ret);
}
}
// Masks for i8x16 unsigned right shift.
static USHR_MASKS: [u8; 128] = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@ -1379,7 +1427,24 @@ fn convert_ushr(
} else if arg0_type.is_vector() {
// x86 has encodings for these shifts.
pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
} else if arg0_type == I64 {
// 64 bit shifts need to be legalized on x86_32.
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.has_sse41() {
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
let shifted = pos.ins().x86_psrl(value, amount);
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
} else {
// otherwise legalize to libcall
expand_as_libcall(inst, func, isa);
}
} else {
// Everything else should be already legal.
unreachable!()
}
}
@ -1446,12 +1511,76 @@ fn convert_ishl(
} else if arg0_type.is_vector() {
// x86 has encodings for these shifts.
pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
} else if arg0_type == I64 {
// 64 bit shifts need to be legalized on x86_32.
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.has_sse41() {
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
let shifted = pos.ins().x86_psll(value, amount);
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
} else {
// otherwise legalize to libcall
expand_as_libcall(inst, func, isa);
}
} else {
// Everything else should be already legal.
unreachable!()
}
}
}
/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
fn convert_i64x2_imul(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Binary {
opcode: ir::Opcode::Imul,
args: [arg0, arg1],
} = pos.func.dfg[inst]
{
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty == I64X2 {
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
// If we have certain AVX512 features, we can lower this instruction simply.
pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
} else {
// Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
// 64-bit lane into 32-bit high and low sections using shifting and then performs
// the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
// concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
let high0 = pos.ins().ushr_imm(arg0, 32);
let mul0 = pos.ins().x86_pmuludq(high0, arg1);
let high1 = pos.ins().ushr_imm(arg1, 32);
let mul1 = pos.ins().x86_pmuludq(high1, arg0);
let addhigh = pos.ins().iadd(mul0, mul1);
let high = pos.ins().ishl_imm(addhigh, 32);
let low = pos.ins().x86_pmuludq(arg0, arg1);
pos.func.dfg.replace(inst).iadd(low, high);
}
} else {
unreachable!(
"{} should be encodable; it cannot be legalized by convert_i64x2_imul",
pos.func.dfg.display_inst(inst, None)
);
}
}
}
fn expand_tls_value(
inst: ir::Inst,
func: &mut ir::Function,

Просмотреть файл

@ -23,6 +23,7 @@ use crate::result::CodegenResult;
use crate::timing;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use core::any::Any;
use core::fmt;
use target_lexicon::{PointerWidth, Triple};
@ -53,12 +54,23 @@ fn isa_constructor(
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
};
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
cpumode: level1,
})
let isa_flags = settings::Flags::new(&shared_flags, builder);
if isa_flags.use_new_backend() {
#[cfg(not(feature = "x64"))]
panic!("new backend x86 support not included by cargo features!");
#[cfg(feature = "x64")]
super::x64::isa_builder(triple).finish(shared_flags)
} else {
Box::new(Isa {
triple,
isa_flags,
shared_flags,
cpumode: level1,
})
}
}
impl TargetIsa for Isa {
@ -173,6 +185,10 @@ impl TargetIsa for Isa {
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
Some(unwind::systemv::create_cie())
}
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
}
impl fmt::Display for Isa {

Просмотреть файл

@ -28,22 +28,7 @@ pub(crate) fn create_unwind_info(
let mut prologue_size = 0;
let mut unwind_codes = Vec::new();
let mut found_end = false;
// Have we saved at least one FPR? if so, we might have to check additional constraints.
let mut saved_fpr = false;
// In addition to the min offset for a callee-save, we need to know the offset from the
// frame base to the stack pointer, so that we can record an unwind offset that spans only
// to the end of callee-save space.
let mut static_frame_allocation_size = 0u32;
// For the time being, FPR preservation is split into a stack_addr and later store/load.
// Store the register used for stack store and ensure it is the same register with no
// intervening changes to the frame size.
let mut callee_save_region_reg = None;
// Also record the callee-save region's offset from RSP, because it must be added to FPR
// save offsets to compute an offset from the frame base.
let mut callee_save_offset = None;
let mut xmm_save_count: u8 = 0;
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
// x64 ABI prologues cannot exceed 255 bytes in length
@ -60,8 +45,6 @@ pub(crate) fn create_unwind_info(
InstructionData::Unary { opcode, arg } => {
match opcode {
Opcode::X86Push => {
static_frame_allocation_size += 8;
unwind_codes.push(UnwindCode::PushRegister {
offset: unwind_offset,
reg: GPR.index_of(func.locations[arg].unwrap_reg()) as u8,
@ -70,7 +53,6 @@ pub(crate) fn create_unwind_info(
Opcode::AdjustSpDown => {
let stack_size =
stack_size.expect("expected a previous stack size instruction");
static_frame_allocation_size += stack_size;
// This is used when calling a stack check function
// We need to track the assignment to RAX which has the size of the stack
@ -85,10 +67,6 @@ pub(crate) fn create_unwind_info(
InstructionData::CopySpecial { src, dst, .. } => {
if let Some(frame_register) = frame_register {
if src == (RU::rsp as RegUnit) && dst == frame_register {
// Constructing an rbp-based stack frame, so the static frame
// allocation restarts at 0 from here.
static_frame_allocation_size = 0;
unwind_codes.push(UnwindCode::SetFramePointer {
offset: unwind_offset,
sp_offset: 0,
@ -113,7 +91,7 @@ pub(crate) fn create_unwind_info(
let imm: i64 = imm.into();
assert!(imm <= core::u32::MAX as i64);
static_frame_allocation_size += imm as u32;
stack_size = Some(imm as u32);
unwind_codes.push(UnwindCode::StackAlloc {
offset: unwind_offset,
@ -123,52 +101,27 @@ pub(crate) fn create_unwind_info(
_ => {}
}
}
InstructionData::StackLoad {
opcode: Opcode::StackAddr,
stack_slot,
offset: _,
} => {
let result = func.dfg.inst_results(inst).get(0).unwrap();
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
callee_save_region_reg = Some(frame_reg);
// Figure out the offset in the call frame that `frame_reg` will have.
let frame_size = func
.stack_slots
.layout_info
.expect("func's stack slots have layout info if stack operations exist")
.frame_size;
// Because we're well after the prologue has been constructed, stack slots
// must have been laid out...
let slot_offset = func.stack_slots[stack_slot]
.offset
.expect("callee-save slot has an offset computed");
let frame_offset = frame_size as i32 + slot_offset;
callee_save_offset = Some(frame_offset as u32);
}
}
InstructionData::Store {
opcode: Opcode::Store,
args: [arg1, arg2],
flags: _flags,
offset,
..
} => {
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
(func.locations[arg1], func.locations[arg2])
{
if Some(base_ru) == callee_save_region_reg {
let offset_int: i32 = offset.into();
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
if FPR.contains(ru) {
saved_fpr = true;
unwind_codes.push(UnwindCode::SaveXmm {
offset: unwind_offset,
reg: ru as u8,
stack_offset: offset_int,
});
}
// If this is a save of an FPR, record an unwind operation
// Note: the stack_offset here is relative to an adjusted SP
// This will be fixed up later to be based on the frame pointer offset
if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
let offset: i32 = offset.into();
unwind_codes.push(UnwindCode::SaveXmm {
offset: unwind_offset,
reg: src as u8,
stack_offset: offset as u32,
});
xmm_save_count += 1;
}
}
}
@ -183,41 +136,45 @@ pub(crate) fn create_unwind_info(
assert!(found_end);
if saved_fpr {
if static_frame_allocation_size > 240 && saved_fpr {
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
This is a Cranelift implementation limit, see \
https://github.com/bytecodealliance/wasmtime/issues/1475",
static_frame_allocation_size);
return Err(CodegenError::ImplLimitExceeded);
// When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
// register minus some offset, forming a "base address". This attempts to calculate the frame register offset
// while updating the XMM save offsets to be relative from this "base address" rather than RSP.
let mut frame_register_offset = 0;
if frame_register.is_some() && xmm_save_count > 0 {
// Determine the number of 16-byte slots used for all CSRs (including GPRs)
// The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
// Assumption: each FPR is stored at a lower address than the previous one
let mut last_stack_offset = None;
let mut fpr_save_count: u8 = 0;
let mut gpr_push_count: u8 = 0;
for code in unwind_codes.iter_mut() {
match code {
UnwindCode::SaveXmm { stack_offset, .. } => {
if let Some(last) = last_stack_offset {
assert!(last > *stack_offset);
}
last_stack_offset = Some(*stack_offset);
fpr_save_count += 1;
*stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
}
UnwindCode::PushRegister { .. } => {
gpr_push_count += 1;
}
_ => {}
}
}
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
// panicking when alignment is elided because no FPRs are saved and no child calls are
// made.
assert!(
static_frame_allocation_size % 16 == 0,
"static frame allocation must be a multiple of 16"
);
}
assert_eq!(fpr_save_count, xmm_save_count);
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
// frames are inexpressible, and we cannot actually compile the function. In case there are
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
// will actually check it. This, then, avoids panics when compiling functions with large
// call frames.
let reported_frame_offset = if saved_fpr {
(static_frame_allocation_size / 16) as u8
} else {
0
};
// Account for alignment space when there's an odd number of GPR pushes
// Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
}
Ok(Some(UnwindInfo {
flags: 0, // this assumes cranelift functions have no SEH handlers
prologue_size: prologue_size as u8,
frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
frame_register_offset: reported_frame_offset,
frame_register_offset,
unwind_codes,
}))
}
@ -284,7 +241,7 @@ mod tests {
},
UnwindCode::StackAlloc {
offset: 9,
size: 64 + 32
size: 64
}
]
}
@ -303,7 +260,7 @@ mod tests {
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
0x05, // Frame register + offset (RBP with 0 offset)
0x09, // Prolog offset
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
@ -349,7 +306,7 @@ mod tests {
},
UnwindCode::StackAlloc {
offset: 27,
size: 10000 + 32
size: 10000
}
]
}
@ -369,8 +326,8 @@ mod tests {
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
0xE6, // Low size byte
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
0xE2, // Low size byte
0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset
@ -414,7 +371,7 @@ mod tests {
},
UnwindCode::StackAlloc {
offset: 27,
size: 1000000 + 32
size: 1000000
}
]
}
@ -434,10 +391,10 @@ mod tests {
0x05, // Frame register + offset (RBP with 0 offset)
0x1B, // Prolog offset
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
0x60, // Byte 1 of size
0x40, // Byte 1 of size
0x42, // Byte 2 of size
0x0F, // Byte 3 of size
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
0x05, // Prolog offset
0x03, // Operation 3 (save frame register), stack pointer offset = 0
0x02, // Prolog offset

Просмотреть файл

@ -504,6 +504,13 @@ where
// this value.
pos.ins().with_results([into_result]).ireduce(ty, arg)
}
// ABI argument is a pointer to the value we want.
ValueConversion::Pointer(abi_ty) => {
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
pos.ins()
.with_results([into_result])
.load(ty, MemFlags::new(), arg, 0)
}
}
}
@ -563,6 +570,18 @@ fn convert_to_abi<PutArg>(
let arg = pos.ins().uextend(abi_ty, value);
convert_to_abi(pos, cfg, arg, put_arg);
}
ValueConversion::Pointer(abi_ty) => {
// Note: This conversion can only happen for call arguments,
// so we can allocate the value on stack safely.
let stack_slot = pos.func.create_stack_slot(StackSlotData {
kind: StackSlotKind::ExplicitSlot,
size: ty.bytes(),
offset: None,
});
let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0);
pos.ins().store(MemFlags::new(), value, arg, 0);
convert_to_abi(pos, cfg, arg, put_arg);
}
}
}
@ -757,12 +776,6 @@ pub fn handle_call_abi(
{
legalize_sret_call(isa, pos, sig_ref, inst);
} else {
// OK, we need to fix the call arguments to match the ABI signature.
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
func.dfg.signatures[sig_ref].params[abi_arg]
});
if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
inst = legalize_inst_results(pos, |func, abi_res| {
func.dfg.signatures[sig_ref].returns[abi_res]
@ -770,6 +783,13 @@ pub fn handle_call_abi(
}
}
// Go back and fix the call arguments to match the ABI signature.
pos.goto_inst(inst);
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
func.dfg.signatures[sig_ref].params[abi_arg]
});
debug_assert!(
check_call_signature(&pos.func.dfg, inst).is_ok(),
"Signature still wrong: {}, {}{}",
@ -814,7 +834,12 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
pos.use_srcloc(inst);
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
func.signature.returns[abi_arg]
let arg = func.signature.returns[abi_arg];
debug_assert!(
!arg.legalized_to_pointer,
"Return value cannot be legalized to pointer"
);
arg
});
// Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
// the legalized signature. These values should simply be propagated from the entry block

Просмотреть файл

@ -35,7 +35,7 @@ mod table;
use self::call::expand_call;
use self::globalvalue::expand_global_value;
use self::heap::expand_heap_addr;
use self::libcall::expand_as_libcall;
pub(crate) use self::libcall::expand_as_libcall;
use self::table::expand_table_addr;
enum LegalizeInstResult {

Просмотреть файл

@ -99,12 +99,12 @@ mod iterators;
mod legalizer;
mod licm;
mod nan_canonicalization;
mod num_uses;
mod partition_slice;
mod postopt;
mod predicates;
mod redundant_reload_remover;
mod regalloc;
mod remove_constant_phis;
mod result;
mod scoped_hash_map;
mod simple_gvn;
@ -114,6 +114,9 @@ mod topo_order;
mod unreachable_code;
mod value_label;
#[cfg(feature = "enable-peepmatic")]
mod peepmatic;
pub use crate::result::{CodegenError, CodegenResult};
/// Version number of this crate.

Просмотреть файл

@ -12,6 +12,15 @@ pub trait ABIBody {
/// The instruction type for the ISA associated with this ABI.
type I: VCodeInst;
/// Does the ABI-body code need a temp reg? One will be provided to `init()`
/// as the `maybe_tmp` arg if so.
fn temp_needed(&self) -> bool;
/// Initialize. This is called after the ABIBody is constructed because it
/// may be provided with a temp vreg, which can only be allocated once the
/// lowering context exists.
fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
/// Get the settings controlling this function's compilation.
fn flags(&self) -> &settings::Flags;
@ -34,6 +43,13 @@ pub trait ABIBody {
/// register.
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
/// Generate any setup instruction needed to save values to the
/// return-value area. This is usually used when were are multiple return
/// values or an otherwise large return value that must be passed on the
/// stack; typically the ABI specifies an extra hidden argument that is a
/// pointer to that memory.
fn gen_retval_area_setup(&self) -> Option<Self::I>;
/// Generate an instruction which copies a source register to a return value slot.
fn gen_copy_reg_to_retval(
&self,
@ -98,7 +114,10 @@ pub trait ABIBody {
fn gen_epilogue(&self) -> Vec<Self::I>;
/// Returns the full frame size for the given function, after prologue emission has run. This
/// comprises the spill space, incoming argument space, alignment padding, etc.
/// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
/// Baldrdash (SpiderMonkey integration).
fn frame_size(&self) -> u32;
/// Get the spill-slot size.
@ -132,24 +151,29 @@ pub trait ABICall {
/// Get the number of arguments expected.
fn num_args(&self) -> usize;
/// Copy an argument value from a source register, prior to the call.
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
/// Emit a copy of an argument value from a source register, prior to the call.
fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
&self,
ctx: &mut C,
idx: usize,
from_reg: Reg,
) -> Vec<Self::I>;
);
/// Copy a return value into a destination register, after the call returns.
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
/// Emit a copy a return value into a destination register, after the call returns.
fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
&self,
ctx: &mut C,
idx: usize,
into_reg: Writable<Reg>,
);
/// Pre-adjust the stack, prior to argument copies and call.
fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;
/// Emit code to pre-adjust the stack, prior to argument copies and call.
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
/// Post-adjust the satck, after call return and return-value copies.
fn gen_stack_post_adjust(&self) -> Vec<Self::I>;
/// Emit code to post-adjust the satck, after call return and return-value copies.
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
/// Generate the call itself.
/// Emit the call itself.
///
/// The returned instruction should have proper use- and def-sets according
/// to the argument registers, return-value registers, and clobbered
@ -159,5 +183,8 @@ pub trait ABICall {
/// registers are also logically defs, but should never be read; their
/// values are "defined" (to the regalloc) but "undefined" in every other
/// sense.)
fn gen_call(&self) -> Vec<Self::I>;
///
/// This function should only be called once, as it is allowed to re-use
/// parts of the ABICall object in emitting instructions.
fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
}

Просмотреть файл

@ -10,6 +10,7 @@ use crate::settings::Flags;
#[cfg(feature = "testing_hooks")]
use crate::regalloc::RegDiversions;
use core::any::Any;
use std::borrow::Cow;
use std::fmt;
use target_lexicon::Triple;
@ -127,4 +128,8 @@ impl TargetIsa for TargetIsaAdapter {
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
self.backend.unsigned_sub_overflow_condition()
}
fn as_any(&self) -> &dyn Any {
self as &dyn Any
}
}

Просмотреть файл

@ -1,59 +1,624 @@
//! Computation of basic block order in emitted code.
//!
//! This module handles the translation from CLIF BBs to VCode BBs.
//!
//! The basic idea is that we compute a sequence of "lowered blocks" that
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
//! moves for phi-nodes on every block-to-block transfer; these blocks always
//! conceptually exist, but may be merged with an "original" CLIF block (and
//! hence not actually exist; this is equivalent to inserting the blocks only on
//! critical edges).
//!
//! In other words, starting from a CFG like this (where each "CLIF block" and
//! "(edge N->M)" is a separate basic block):
//!
//! ```plain
//!
//! CLIF block 0
//! / \
//! (edge 0->1) (edge 0->2)
//! | |
//! CLIF block 1 CLIF block 2
//! \ /
//! (edge 1->3) (edge 2->3)
//! \ /
//! CLIF block 3
//! ```
//!
//! We can produce a CFG of lowered blocks like so:
//!
//! ```plain
//! +--------------+
//! | CLIF block 0 |
//! +--------------+
//! / \
//! +--------------+ +--------------+
//! | (edge 0->1) | |(edge 0->2) |
//! | CLIF block 1 | | CLIF block 2 |
//! +--------------+ +--------------+
//! \ /
//! +-----------+ +-----------+
//! |(edge 1->3)| |(edge 2->3)|
//! +-----------+ +-----------+
//! \ /
//! +------------+
//! |CLIF block 3|
//! +------------+
//! ```
//!
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
//! blocks' original bodies, but the out-edges could not because for simplicity
//! in the successor-function definition, we only ever merge an edge onto one
//! side of an original CLIF block.)
//!
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
//! block prepended or appended with an edge block (never both, though), or just
//! an edge block.
//!
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
//! (never actually materialized, just defined by a "successors" function), and
//! compute the reverse postorder.
//!
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
//! example, consider any information about whether edge blocks will actually
//! have content, because this computation happens as part of lowering *before*
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
//! particular edge. But it works relatively well and is conceptually simple.
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
//! branch editing that in practice elides empty blocks and simplifies some of
//! the other redundancies that this scheme produces.
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::ir::{Block, Function, Inst, Opcode};
use crate::machinst::lower::visit_block_succs;
use crate::machinst::*;
use regalloc::{BlockIx, Function};
/// Simple reverse postorder-based block order emission.
///
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
/// construction algorithm.
struct BlockRPO {
visited: Vec<bool>,
postorder: Vec<BlockIndex>,
deferred_last: Option<BlockIndex>,
use log::debug;
use smallvec::SmallVec;
/// Mapping from CLIF BBs to VCode BBs.
#[derive(Debug)]
pub struct BlockLoweringOrder {
/// Lowered blocks, in BlockIndex order. Each block is some combination of
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
/// see [LoweredBlock] for details.
lowered_order: Vec<LoweredBlock>,
/// Successors for all lowered blocks, in one serialized vector. Indexed by
/// the ranges in `lowered_succ_ranges`.
lowered_succs: Vec<(Inst, LoweredBlock)>,
/// BlockIndex values for successors for all lowered blocks, in the same
/// order as `lowered_succs`.
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
/// Ranges in `lowered_succs` giving the successor lists for each lowered
/// block. Indexed by lowering-order index (`BlockIndex`).
lowered_succ_ranges: Vec<(usize, usize)>,
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
/// blocks.
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
}
impl BlockRPO {
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
BlockRPO {
visited: vec![false; vcode.num_blocks()],
postorder: vec![],
deferred_last: None,
/// The origin of a block in the lowered block-order: either an original CLIF
/// block, or an inserted edge-block, or a combination of the two if an edge is
/// non-critical.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum LoweredBlock {
/// Block in original CLIF, with no merged edge-blocks.
Orig {
/// Original CLIF block.
block: Block,
},
/// Block in the original CLIF, plus edge-block to one succ (which is the
/// one successor of the original block).
OrigAndEdge {
/// The original CLIF block contained in this lowered block.
block: Block,
/// The edge (jump) instruction transitioning from this block
/// to the next, i.e., corresponding to the included edge-block. This
/// will be an instruction in `block`.
edge_inst: Inst,
/// The successor CLIF block.
succ: Block,
},
/// Block in the original CLIF, preceded by edge-block from one pred (which
/// is the one pred of the original block).
EdgeAndOrig {
/// The previous CLIF block, i.e., the edge block's predecessor.
pred: Block,
/// The edge (jump) instruction corresponding to the included
/// edge-block. This will be an instruction in `pred`.
edge_inst: Inst,
/// The original CLIF block included in this lowered block.
block: Block,
},
/// Split critical edge between two CLIF blocks. This lowered block does not
/// correspond to any original CLIF blocks; it only serves as an insertion
/// point for work to happen on the transition from `pred` to `succ`.
Edge {
/// The predecessor CLIF block.
pred: Block,
/// The edge (jump) instruction corresponding to this edge's transition.
/// This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor CLIF block.
succ: Block,
},
}
impl LoweredBlock {
/// The associated original (CLIF) block included in this lowered block, if
/// any.
pub fn orig_block(self) -> Option<Block> {
match self {
LoweredBlock::Orig { block, .. }
| LoweredBlock::OrigAndEdge { block, .. }
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
LoweredBlock::Edge { .. } => None,
}
}
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
self.visited[block as usize] = true;
for succ in vcode.succs(block) {
if !self.visited[*succ as usize] {
self.visit(vcode, *succ);
/// The associated in-edge, if any.
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::EdgeAndOrig {
pred,
edge_inst,
block,
} => Some((pred, edge_inst, block)),
_ => None,
}
}
/// the associated out-edge, if any. Also includes edge-only blocks.
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::OrigAndEdge {
block,
edge_inst,
succ,
} => Some((block, edge_inst, succ)),
LoweredBlock::Edge {
pred,
edge_inst,
succ,
} => Some((pred, edge_inst, succ)),
_ => None,
}
}
}
impl BlockLoweringOrder {
/// Compute and return a lowered block order for `f`.
pub fn new(f: &Function) -> BlockLoweringOrder {
debug!("BlockLoweringOrder: function body {:?}", f);
// Step 1: compute the in-edge and out-edge count of every block.
let mut block_in_count = SecondaryMap::with_default(0);
let mut block_out_count = SecondaryMap::with_default(0);
// Cache the block successors to avoid re-examining branches below.
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
let mut block_succ_range = SecondaryMap::with_default((0, 0));
let mut fallthrough_return_block = None;
for block in f.layout.blocks() {
let block_succ_start = block_succs.len();
visit_block_succs(f, block, |inst, succ| {
block_out_count[block] += 1;
block_in_count[succ] += 1;
block_succs.push((inst, succ));
});
let block_succ_end = block_succs.len();
block_succ_range[block] = (block_succ_start, block_succ_end);
for inst in f.layout.block_likely_branches(block) {
if f.dfg[inst].opcode() == Opcode::Return {
// Implicit output edge for any return.
block_out_count[block] += 1;
}
if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
// Fallthrough return block must come last.
debug_assert!(fallthrough_return_block == None);
fallthrough_return_block = Some(block);
}
}
}
// Implicit input edge for entry block.
if let Some(entry) = f.layout.entry_block() {
block_in_count[entry] += 1;
}
// Here we define the implicit CLIF-plus-edges graph. There are
// conceptually two such graphs: the original, with every edge explicit,
// and the merged one, with blocks (represented by `LoweredBlock`
// values) that contain original CLIF blocks, edges, or both. This
// function returns a lowered block's successors as per the latter, with
// consideration to edge-block merging.
//
// Note that there is a property of the block-merging rules below
// that is very important to ensure we don't miss any lowered blocks:
// any block in the implicit CLIF-plus-edges graph will *only* be
// included in one block in the merged graph.
//
// This, combined with the property that every edge block is reachable
// only from one predecessor (and hence cannot be reached by a DFS
// backedge), means that it is sufficient in our DFS below to track
// visited-bits per original CLIF block only, not per edge. This greatly
// simplifies the data structures (no need to keep a sparse hash-set of
// (block, block) tuples).
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
let start_idx = ret.len();
match block {
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
// At an orig block; successors are always edge blocks,
// possibly with orig blocks following.
let range = block_succ_range[block];
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
if block_in_count[succ] == 1 {
ret.push((
edge_inst,
LoweredBlock::EdgeAndOrig {
pred: block,
edge_inst,
block: succ,
},
));
} else {
ret.push((
edge_inst,
LoweredBlock::Edge {
pred: block,
edge_inst,
succ,
},
));
}
}
}
LoweredBlock::Edge {
succ, edge_inst, ..
}
| LoweredBlock::OrigAndEdge {
succ, edge_inst, ..
} => {
// At an edge block; successors are always orig blocks,
// possibly with edge blocks following.
if block_out_count[succ] == 1 {
let range = block_succ_range[succ];
// check if the one succ is a real CFG edge (vs.
// implicit return succ).
if range.1 - range.0 > 0 {
debug_assert!(range.1 - range.0 == 1);
let (succ_edge_inst, succ_succ) = block_succs[range.0];
ret.push((
edge_inst,
LoweredBlock::OrigAndEdge {
block: succ,
edge_inst: succ_edge_inst,
succ: succ_succ,
},
));
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
} else {
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
}
}
}
let end_idx = ret.len();
(start_idx, end_idx)
};
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
let mut lowered_succs = vec![];
let mut lowered_succ_indices = vec![];
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
// explicit stack so we don't overflow the real stack with a deep DFS.
#[derive(Debug)]
struct StackEntry {
this: LoweredBlock,
succs: (usize, usize), // range in lowered_succs
cur_succ: usize, // index in lowered_succs
}
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
let mut visited = FxHashSet::default();
let mut postorder = vec![];
if let Some(entry) = f.layout.entry_block() {
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
// to not special-case the entry block here.
let block = LoweredBlock::Orig { block: entry };
visited.insert(block);
let range = compute_lowered_succs(&mut lowered_succs, block);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: block,
succs: range,
cur_succ: range.1,
});
}
let mut deferred_last = None;
while !stack.is_empty() {
let stack_entry = stack.last_mut().unwrap();
let range = stack_entry.succs;
if stack_entry.cur_succ == range.0 {
let orig_block = stack_entry.this.orig_block();
if orig_block.is_some() && orig_block == fallthrough_return_block {
deferred_last = Some((stack_entry.this, range));
} else {
postorder.push((stack_entry.this, range));
}
stack.pop();
} else {
// Heuristic: chase the children in reverse. This puts the first
// successor block first in RPO, all other things being equal,
// which tends to prioritize loop backedges over out-edges,
// putting the edge-block closer to the loop body and minimizing
// live-ranges in linear instruction space.
let next = lowered_succs[stack_entry.cur_succ - 1].1;
stack_entry.cur_succ -= 1;
if visited.contains(&next) {
continue;
}
visited.insert(next);
let range = compute_lowered_succs(&mut lowered_succs, next);
lowered_succ_indices.resize(lowered_succs.len(), 0);
stack.push(StackEntry {
this: next,
succs: range,
cur_succ: range.1,
});
}
}
for i in vcode.block_insns(BlockIx::new(block)) {
if vcode.get_insn(i).is_epilogue_placeholder() {
debug_assert!(self.deferred_last.is_none());
self.deferred_last = Some(block);
return;
postorder.reverse();
let mut rpo = postorder;
if let Some(d) = deferred_last {
rpo.push(d);
}
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
let mut lowered_order = vec![];
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
for (block, succ_range) in rpo.into_iter() {
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);
}
let lowered_succ_indices = lowered_succs
.iter()
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
.collect();
let mut orig_map = SecondaryMap::with_default(None);
for (i, lb) in lowered_order.iter().enumerate() {
let i = i as BlockIndex;
if let Some(b) = lb.orig_block() {
orig_map[b] = Some(i);
}
}
self.postorder.push(block);
let result = BlockLoweringOrder {
lowered_order,
lowered_succs,
lowered_succ_indices,
lowered_succ_ranges,
orig_map,
};
debug!("BlockLoweringOrder: {:?}", result);
result
}
fn rpo(self) -> Vec<BlockIndex> {
let mut rpo = self.postorder;
rpo.reverse();
if let Some(block) = self.deferred_last {
rpo.push(block);
/// Get the lowered order of blocks.
pub fn lowered_order(&self) -> &[LoweredBlock] {
&self.lowered_order[..]
}
/// Get the successors for a lowered block, by index in `lowered_order()`'s
/// returned slice. Each successsor is paired with the edge-instruction
/// (branch) corresponding to this edge.
pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succs[range.0..range.1]
}
/// Get the successor indices for a lowered block.
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succ_indices[range.0..range.1]
}
/// Get the lowered block index containing a CLIF block, if any. (May not be
/// present if the original CLIF block was unreachable.)
pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
self.orig_map[bb]
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
assert!(n_blocks > 0);
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let blocks = (0..n_blocks)
.map(|i| {
let bb = func.dfg.make_block();
assert!(bb.as_u32() == i as u32);
bb
})
.collect::<Vec<_>>();
let arg0 = func.dfg.append_block_param(blocks[0], I32);
let mut pos = FuncCursor::new(&mut func);
let mut edge = 0;
for i in 0..n_blocks {
pos.insert_block(blocks[i]);
let mut succs = vec![];
while edge < edges.len() && edges[edge].0 == i {
succs.push(edges[edge].1);
edge += 1;
}
if succs.len() == 0 {
pos.ins().return_(&[arg0]);
} else if succs.len() == 1 {
pos.ins().jump(blocks[succs[0]], &[]);
} else if succs.len() == 2 {
pos.ins().brnz(arg0, blocks[succs[0]], &[]);
pos.ins().jump(blocks[succs[1]], &[]);
} else {
panic!("Too many successors");
}
}
rpo
func
}
#[test]
fn test_blockorder_diamond() {
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 6);
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[2].orig_block().is_none());
assert!(order.lowered_order[2].in_edge().is_none());
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[3].out_edge().is_none());
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[5].in_edge().is_none());
assert!(order.lowered_order[5].out_edge().is_none());
}
#[test]
fn test_blockorder_critedge() {
// 0
// / \
// 1 2
// / \ \
// 3 4 |
// |\ _|____|
// | \/ |
// | /\ |
// 5 6
//
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
//
let func = build_test_func(
7,
&[
(0, 1),
(0, 2),
(1, 3),
(1, 4),
(2, 5),
(3, 5),
(3, 6),
(4, 6),
],
);
let order = BlockLoweringOrder::new(&func);
assert_eq!(order.lowered_order.len(), 11);
println!("ordered = {:?}", order.lowered_order);
// block 0
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
assert!(order.lowered_order[0].in_edge().is_none());
assert!(order.lowered_order[0].out_edge().is_none());
// edge 0->1 + block 1
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
assert!(order.lowered_order[1].out_edge().is_none());
// edge 1->3 + block 3
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
assert!(order.lowered_order[2].out_edge().is_none());
// edge 3->5
assert!(order.lowered_order[3].orig_block().is_none());
assert!(order.lowered_order[3].in_edge().is_none());
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
// edge 3->6
assert!(order.lowered_order[4].orig_block().is_none());
assert!(order.lowered_order[4].in_edge().is_none());
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
// edge 1->4 + block 4
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
assert!(order.lowered_order[5].out_edge().is_none());
// edge 4->6
assert!(order.lowered_order[6].orig_block().is_none());
assert!(order.lowered_order[6].in_edge().is_none());
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
// block 6
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
assert!(order.lowered_order[7].in_edge().is_none());
assert!(order.lowered_order[7].out_edge().is_none());
// edge 0->2 + block 2
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
assert!(order.lowered_order[8].out_edge().is_none());
// edge 2->5
assert!(order.lowered_order[9].orig_block().is_none());
assert!(order.lowered_order[9].in_edge().is_none());
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
// block 5
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
assert!(order.lowered_order[10].in_edge().is_none());
assert!(order.lowered_order[10].out_edge().is_none());
}
}
/// Compute the final block order.
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
let mut rpo = BlockRPO::new(vcode);
rpo.visit(vcode, vcode.entry());
rpo.rpo()
}

1522
third_party/rust/cranelift-codegen/src/machinst/buffer.rs поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,11 +6,11 @@ use crate::settings;
use crate::timing;
use log::debug;
use regalloc::{allocate_registers, RegAllocAlgorithm};
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
/// Compile the given function down to VCode with allocated registers, ready
/// for binary emission.
pub fn compile<B: LowerBackend>(
pub fn compile<B: LowerBackend + MachBackend>(
f: &Function,
b: &B,
abi: Box<dyn ABIBody<I = B::MInst>>,
@ -18,29 +18,46 @@ pub fn compile<B: LowerBackend>(
where
B::MInst: ShowWithRRU,
{
// This lowers the CL IR.
let mut vcode = Lower::new(f, abi)?.lower(b)?;
// Compute lowered block order.
let block_order = BlockLoweringOrder::new(f);
// Build the lowering context.
let lower = Lower::new(f, abi, block_order)?;
// Lower the IR.
let mut vcode = lower.lower(b)?;
let universe = &B::MInst::reg_universe(vcode.flags());
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
debug!(
"vcode from lowering: \n{}",
vcode.show_rru(Some(b.reg_universe()))
);
// Perform register allocation.
let algorithm = match vcode.flags().regalloc() {
settings::Regalloc::Backtracking => RegAllocAlgorithm::Backtracking,
settings::Regalloc::BacktrackingChecked => RegAllocAlgorithm::BacktrackingChecked,
settings::Regalloc::ExperimentalLinearScan => RegAllocAlgorithm::LinearScan,
let (run_checker, algorithm) = match vcode.flags().regalloc() {
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
settings::Regalloc::BacktrackingChecked => {
(true, Algorithm::Backtracking(Default::default()))
}
settings::Regalloc::ExperimentalLinearScan => {
(false, Algorithm::LinearScan(Default::default()))
}
settings::Regalloc::ExperimentalLinearScanChecked => {
(true, Algorithm::LinearScan(Default::default()))
}
};
let result = {
let _tt = timing::regalloc();
allocate_registers(
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
allocate_registers_with_opts(
&mut vcode,
b.reg_universe(),
Options {
run_checker,
algorithm,
},
)
.map_err(|err| {
debug!(
"Register allocation error for vcode\n{}\nError: {:?}",
vcode.show_rru(Some(universe)),
vcode.show_rru(Some(b.reg_universe())),
err
);
err
@ -52,14 +69,9 @@ where
// all at once. This also inserts prologues/epilogues.
vcode.replace_insns_from_regalloc(result);
vcode.remove_redundant_branches();
// Do final passes over code to finalize branches.
vcode.finalize_branches();
debug!(
"vcode after regalloc: final version:\n{}",
vcode.show_rru(Some(universe))
vcode.show_rru(Some(b.reg_universe()))
);
Ok(vcode)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -109,6 +109,7 @@ use regalloc::RegUsageCollector;
use regalloc::{
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use smallvec::SmallVec;
use std::string::String;
use target_lexicon::Triple;
@ -124,8 +125,8 @@ pub mod abi;
pub use abi::*;
pub mod pretty_print;
pub use pretty_print::*;
pub mod sections;
pub use sections::*;
pub mod buffer;
pub use buffer::*;
pub mod adapter;
pub use adapter::*;
@ -137,7 +138,7 @@ pub trait MachInst: Clone + Debug {
/// Map virtual registers to physical registers using the given virt->phys
/// maps corresponding to the program points prior to, and after, this instruction.
fn map_regs(&mut self, maps: &RegUsageMapper);
fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
/// If this is a simple move, return the (source, destination) tuple of registers.
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
@ -152,6 +153,9 @@ pub trait MachInst: Clone + Debug {
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
/// Generate a constant into a reg.
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]>;
/// Generate a zero-length no-op.
fn gen_zero_len_nop() -> Self;
@ -166,7 +170,7 @@ pub trait MachInst: Clone + Debug {
/// Generate a jump to another target. Used during lowering of
/// control flow.
fn gen_jump(target: BlockIndex) -> Self;
fn gen_jump(target: MachLabel) -> Self;
/// Generate a NOP. The `preferred_size` parameter allows the caller to
/// request a NOP of that size, or as close to it as possible. The machine
@ -175,17 +179,6 @@ pub trait MachInst: Clone + Debug {
/// the instruction must have a nonzero size.
fn gen_nop(preferred_size: usize) -> Self;
/// Rewrite block targets using the block-target map.
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
/// Finalize branches once the block order (fallthrough) is known.
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
/// Update instruction once block offsets are known. These offsets are
/// relative to the beginning of the function. `targets` is indexed by
/// BlockIndex.
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
/// Get the register universe for this backend.
fn reg_universe(flags: &Flags) -> RealRegUniverse;
@ -194,6 +187,54 @@ pub trait MachInst: Clone + Debug {
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
offset
}
/// What is the worst-case instruction size emitted by this instruction type?
fn worst_case_size() -> CodeOffset;
/// A label-use kind: a type that describes the types of label references that
/// can occur in an instruction.
type LabelUse: MachInstLabelUse;
}
/// A descriptor of a label reference (use) in an instruction set.
pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
/// Required alignment for any veneer. Usually the required instruction
/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
const ALIGN: CodeOffset;
/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
/// label-reference fixup at offset `x` is valid if the label resolves to `x
/// + 1024`.
fn max_pos_range(self) -> CodeOffset;
/// What is the maximum PC-relative range (negative)? This is the absolute
/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
/// valid if the label resolves to `x - 1024`.
fn max_neg_range(self) -> CodeOffset;
/// What is the size of code-buffer slice this label-use needs to patch in
/// the label's value?
fn patch_size(self) -> CodeOffset;
/// Perform a code-patch, given the offset into the buffer of this label use
/// and the offset into the buffer of the label's definition.
/// It is guaranteed that, given `delta = offset - label_offset`, we will
/// have `offset >= -self.max_neg_range()` and `offset <=
/// self.max_pos_range()`.
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
/// Can the label-use be patched to a veneer that supports a longer range?
/// Usually valid for jumps (a short-range jump can jump to a longer-range
/// jump), but not for e.g. constant pool references, because the constant
/// load would require different code (one more level of indirection).
fn supports_veneer(self) -> bool;
/// How many bytes are needed for a veneer?
fn veneer_size(self) -> CodeOffset;
/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
/// bytes long at offset `veneer_offset` in the buffer. The original
/// label-use will be patched to refer to this veneer's offset. A new
/// (offset, LabelUse) is returned that allows the veneer to use the actual
/// label. For veneers to work properly, it is expected that the new veneer
/// has a larger range; on most platforms this probably means either a
/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
/// stage, a jump that supports a full 32-bit range, for example.
fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
}
/// Describes a block terminator (not call) in the vcode, when its branches
@ -205,24 +246,26 @@ pub enum MachTerminator<'a> {
/// A return instruction.
Ret,
/// An unconditional branch to another block.
Uncond(BlockIndex),
Uncond(MachLabel),
/// A conditional branch to one of two other blocks.
Cond(BlockIndex, BlockIndex),
Cond(MachLabel, MachLabel),
/// An indirect branch with known possible targets.
Indirect(&'a [BlockIndex]),
Indirect(&'a [MachLabel]),
}
/// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit<O: MachSectionOutput> {
pub trait MachInstEmit: MachInst {
/// Persistent state carried across `emit` invocations.
type State: Default + Clone + Debug;
/// Emit the instruction.
fn emit(&self, code: &mut O, flags: &Flags);
fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
}
/// The result of a `MachBackend::compile_function()` call. Contains machine
/// code (as bytes) and a disassembly, if requested.
pub struct MachCompileResult {
/// Machine code.
pub sections: MachSections,
pub buffer: MachBufferFinalized,
/// Size of stack frame, in bytes.
pub frame_size: u32,
/// Disassembly, if requested.
@ -232,7 +275,7 @@ pub struct MachCompileResult {
impl MachCompileResult {
/// Get a `CodeInfo` describing section sizes from this compilation result.
pub fn code_info(&self) -> CodeInfo {
let code_size = self.sections.total_size();
let code_size = self.buffer.total_size();
CodeInfo {
code_size,
jumptables_size: 0,
@ -262,17 +305,13 @@ pub trait MachBackend {
fn name(&self) -> &'static str;
/// Return the register universe for this backend.
fn reg_universe(&self) -> RealRegUniverse;
fn reg_universe(&self) -> &RealRegUniverse;
/// Machine-specific condcode info needed by TargetIsa.
fn unsigned_add_overflow_condition(&self) -> IntCC {
// TODO: this is what x86 specifies. Is this right for arm64?
IntCC::UnsignedLessThan
}
/// Condition that will be true when an IaddIfcout overflows.
fn unsigned_add_overflow_condition(&self) -> IntCC;
/// Machine-specific condcode info needed by TargetIsa.
fn unsigned_sub_overflow_condition(&self) -> IntCC {
// TODO: this is what x86 specifies. Is this right for arm64?
IntCC::UnsignedLessThan
}
/// Condition that will be true when an IsubIfcout overflows.
fn unsigned_sub_overflow_condition(&self) -> IntCC;
}

Просмотреть файл

@ -1,460 +0,0 @@
//! In-memory representation of compiled machine code, in multiple sections
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
//! simultaneously, so we buffer the result in memory and hand off to the
//! caller at the end of compilation.
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
use alloc::vec::Vec;
/// A collection of sections with defined start-offsets.
pub struct MachSections {
/// Sections, in offset order.
pub sections: Vec<MachSection>,
}
impl MachSections {
/// New, empty set of sections.
pub fn new() -> MachSections {
MachSections { sections: vec![] }
}
/// Add a section with a known offset and size. Returns the index.
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
let idx = self.sections.len();
self.sections.push(MachSection::new(start, length));
idx
}
/// Mutably borrow the given section by index.
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
&mut self.sections[idx]
}
/// Get mutable borrows of two sections simultaneously. Used during
/// instruction emission to provide references to the .text and .rodata
/// (constant pool) sections.
pub fn two_sections<'a>(
&'a mut self,
idx1: usize,
idx2: usize,
) -> (&'a mut MachSection, &'a mut MachSection) {
assert!(idx1 < idx2);
assert!(idx1 < self.sections.len());
assert!(idx2 < self.sections.len());
let (first, rest) = self.sections.split_at_mut(idx2);
(&mut first[idx1], &mut rest[0])
}
/// Emit this set of sections to a set of sinks for the code,
/// relocations, traps, and stackmap.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
// N.B.: we emit every section into the .text section as far as
// the `CodeSink` is concerned; we do not bother to segregate
// the contents into the actual program text, the jumptable and the
// rodata (constant pool). This allows us to generate code assuming
// that these will not be relocated relative to each other, and avoids
// having to designate each section as belonging in one of the three
// fixed categories defined by `CodeSink`. If this becomes a problem
// later (e.g. because of memory permissions or similar), we can
// add this designation and segregate the output; take care, however,
// to add the appropriate relocations in this case.
for section in &self.sections {
if section.data.len() > 0 {
while sink.offset() < section.start_offset {
sink.put1(0);
}
section.emit(sink);
}
}
sink.begin_jumptables();
sink.begin_rodata();
sink.end_codegen();
}
/// Get a list of source location mapping tuples in sorted-by-start-offset order.
pub fn get_srclocs_sorted<'a>(&'a self) -> MachSectionsSrcLocs<'a> {
MachSectionsSrcLocs::new(&self.sections)
}
/// Get the total required size for these sections.
pub fn total_size(&self) -> CodeOffset {
if self.sections.len() == 0 {
0
} else {
// Find the last non-empty section.
self.sections
.iter()
.rev()
.find(|s| s.data.len() > 0)
.map(|s| s.cur_offset_from_start())
.unwrap_or(0)
}
}
}
/// An iterator over the srclocs in each section.
/// Returns MachSrcLocs in an order sorted by start location.
pub struct MachSectionsSrcLocs<'a> {
sections: &'a [MachSection],
cur_section: usize,
cur_srcloc: usize,
// For validation:
last_offset: CodeOffset,
}
impl<'a> MachSectionsSrcLocs<'a> {
fn new(sections: &'a [MachSection]) -> MachSectionsSrcLocs<'a> {
MachSectionsSrcLocs {
sections,
cur_section: 0,
cur_srcloc: 0,
last_offset: 0,
}
}
}
impl<'a> Iterator for MachSectionsSrcLocs<'a> {
type Item = &'a MachSrcLoc;
fn next(&mut self) -> Option<&'a MachSrcLoc> {
// We simply iterate through sections and srcloc records in order. This produces a
// sorted order naturally because sections are in starting-offset-order, and srclocs
// are produced as a section is emitted into, so are in order as well.
// If we're out of sections, we're done.
if self.cur_section >= self.sections.len() {
return None;
}
// Otherwise, make sure we have a srcloc in the current section left to return, and
// advance to the next section if not. Done if we run out of sections.
while self.cur_srcloc >= self.sections[self.cur_section].srclocs.len() {
self.cur_srcloc = 0;
self.cur_section += 1;
if self.cur_section >= self.sections.len() {
return None;
}
}
let loc = &self.sections[self.cur_section].srclocs[self.cur_srcloc];
self.cur_srcloc += 1;
debug_assert!(loc.start >= self.last_offset);
self.last_offset = loc.start;
Some(loc)
}
}
/// An abstraction over MachSection and MachSectionSize: some
/// receiver of section data.
pub trait MachSectionOutput {
/// Get the current offset from the start of all sections.
fn cur_offset_from_start(&self) -> CodeOffset;
/// Get the start offset of this section.
fn start_offset(&self) -> CodeOffset;
/// Add 1 byte to the section.
fn put1(&mut self, _: u8);
/// Add 2 bytes to the section.
fn put2(&mut self, value: u16) {
let [b0, b1] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
}
/// Add 4 bytes to the section.
fn put4(&mut self, value: u32) {
let [b0, b1, b2, b3] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
self.put1(b2);
self.put1(b3);
}
/// Add 8 bytes to the section.
fn put8(&mut self, value: u64) {
let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
self.put1(b0);
self.put1(b1);
self.put1(b2);
self.put1(b3);
self.put1(b4);
self.put1(b5);
self.put1(b6);
self.put1(b7);
}
/// Add a slice of bytes to the section.
fn put_data(&mut self, data: &[u8]);
/// Add a relocation at the current offset.
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
/// Add a trap record at the current offset.
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
/// Add a call return address record at the current offset.
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
/// Start the output for the given source-location at the current offset.
fn start_srcloc(&mut self, loc: SourceLoc);
/// End the output for the previously-given source-location at the current offset.
fn end_srcloc(&mut self);
/// Align up to the given alignment.
fn align_to(&mut self, align_to: CodeOffset) {
assert!(align_to.is_power_of_two());
while self.cur_offset_from_start() & (align_to - 1) != 0 {
self.put1(0);
}
}
}
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
/// Multiple sections may be created with known start offsets in advance; the
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
/// once, after computing the length of the code, so that constant references
/// can use known offsets as instructions are emitted.
pub struct MachSection {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The limit of this section, defined by the start of the next section.
pub length_limit: CodeOffset,
/// The section contents, as raw bytes.
pub data: Vec<u8>,
/// Any relocations referring to this section.
pub relocs: Vec<MachReloc>,
/// Any trap records referring to this section.
pub traps: Vec<MachTrap>,
/// Any call site records referring to this section.
pub call_sites: Vec<MachCallSite>,
/// Any source location mappings referring to this section.
pub srclocs: Vec<MachSrcLoc>,
/// The current source location in progress (after `start_srcloc()` and before `end_srcloc()`).
/// This is a (start_offset, src_loc) tuple.
pub cur_srcloc: Option<(CodeOffset, SourceLoc)>,
}
impl MachSection {
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
MachSection {
start_offset,
length_limit,
data: vec![],
relocs: vec![],
traps: vec![],
call_sites: vec![],
srclocs: vec![],
cur_srcloc: None,
}
}
/// Emit this section to the CodeSink and other associated sinks. The
/// current offset of the CodeSink must match the starting offset of this
/// section.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
assert!(sink.offset() == self.start_offset);
let mut next_reloc = 0;
let mut next_trap = 0;
let mut next_call_site = 0;
for (idx, byte) in self.data.iter().enumerate() {
if next_reloc < self.relocs.len() {
let reloc = &self.relocs[next_reloc];
if reloc.offset == idx as CodeOffset {
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
next_reloc += 1;
}
}
if next_trap < self.traps.len() {
let trap = &self.traps[next_trap];
if trap.offset == idx as CodeOffset {
sink.trap(trap.code, trap.srcloc);
next_trap += 1;
}
}
if next_call_site < self.call_sites.len() {
let call_site = &self.call_sites[next_call_site];
if call_site.ret_addr == idx as CodeOffset {
sink.add_call_site(call_site.opcode, call_site.srcloc);
next_call_site += 1;
}
}
sink.put1(*byte);
}
}
}
impl MachSectionOutput for MachSection {
fn cur_offset_from_start(&self) -> CodeOffset {
self.start_offset + self.data.len() as CodeOffset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, value: u8) {
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
self.data.push(value);
}
fn put_data(&mut self, data: &[u8]) {
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
self.data.extend_from_slice(data);
}
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
let name = name.clone();
self.relocs.push(MachReloc {
offset: self.data.len() as CodeOffset,
srcloc,
kind,
name,
addend,
});
}
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
self.traps.push(MachTrap {
offset: self.data.len() as CodeOffset,
srcloc,
code,
});
}
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
self.call_sites.push(MachCallSite {
ret_addr: self.data.len() as CodeOffset,
srcloc,
opcode,
});
}
fn start_srcloc(&mut self, loc: SourceLoc) {
self.cur_srcloc = Some((self.cur_offset_from_start(), loc));
}
fn end_srcloc(&mut self) {
let (start, loc) = self
.cur_srcloc
.take()
.expect("end_srcloc() called without start_srcloc()");
let end = self.cur_offset_from_start();
// Skip zero-length extends.
debug_assert!(end >= start);
if end > start {
self.srclocs.push(MachSrcLoc { start, end, loc });
}
}
}
/// A MachSectionOutput implementation that records only size.
pub struct MachSectionSize {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The current offset of this section.
pub offset: CodeOffset,
}
impl MachSectionSize {
/// Create a new size-counting dummy section.
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
MachSectionSize {
start_offset,
offset: start_offset,
}
}
/// Return the size this section would take if emitted with a real sink.
pub fn size(&self) -> CodeOffset {
self.offset - self.start_offset
}
}
impl MachSectionOutput for MachSectionSize {
fn cur_offset_from_start(&self) -> CodeOffset {
// All size-counting sections conceptually start at offset 0; this doesn't
// matter when counting code size.
self.offset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, _: u8) {
self.offset += 1;
}
fn put_data(&mut self, data: &[u8]) {
self.offset += data.len() as CodeOffset;
}
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
fn start_srcloc(&mut self, _: SourceLoc) {}
fn end_srcloc(&mut self) {}
}
/// A relocation resulting from a compilation.
pub struct MachReloc {
/// The offset at which the relocation applies, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The kind of relocation.
pub kind: Reloc,
/// The external symbol / name to which this relocation refers.
pub name: ExternalName,
/// The addend to add to the symbol value.
pub addend: i64,
}
/// A trap record resulting from a compilation.
pub struct MachTrap {
/// The offset at which the trap instruction occurs, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The trap code.
pub code: TrapCode,
}
/// A call site record resulting from a compilation.
pub struct MachCallSite {
/// The offset of the call's return address, *relative to the containing section*.
pub ret_addr: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The call's opcode.
pub opcode: Opcode,
}
/// A source-location mapping resulting from a compilation.
#[derive(Clone, Debug)]
pub struct MachSrcLoc {
/// The start of the region of code corresponding to a source location.
/// This is relative to the start of the function, not to the start of the
/// section.
pub start: CodeOffset,
/// The end of the region of code corresponding to a source location.
/// This is relative to the start of the section, not to the start of the
/// section.
pub end: CodeOffset,
/// The source location.
pub loc: SourceLoc,
}

Просмотреть файл

@ -17,9 +17,7 @@
//! See the main module comment in `mod.rs` for more details on the VCode-based
//! backend pipeline.
use crate::entity::SecondaryMap;
use crate::ir;
use crate::ir::SourceLoc;
use crate::ir::{self, SourceLoc};
use crate::machinst::*;
use crate::settings;
@ -30,9 +28,7 @@ use regalloc::{
};
use alloc::boxed::Box;
use alloc::vec::Vec;
use log::debug;
use smallvec::SmallVec;
use alloc::{borrow::Cow, vec::Vec};
use std::fmt;
use std::iter;
use std::string::String;
@ -44,8 +40,8 @@ pub type BlockIndex = u32;
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
pub trait VCodeInst: MachInst + MachInstEmit {}
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
/// A function in "VCode" (virtualized-register code) form, after lowering.
/// This is essentially a standard CFG of basic blocks, where each basic block
@ -79,25 +75,10 @@ pub struct VCode<I: VCodeInst> {
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
/// list of tuples above gives (start, end) ranges within this list that
/// correspond to each basic block's successors.
block_succs: Vec<BlockIndex>,
block_succs: Vec<BlockIx>,
/// Block indices by IR block.
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
/// IR block for each VCode Block. The length of this Vec will likely be
/// less than the total number of Blocks, because new Blocks (for edge
/// splits, for example) are appended during lowering.
bb_by_block: Vec<ir::Block>,
/// Order of block IDs in final generated code.
final_block_order: Vec<BlockIndex>,
/// Final block offsets. Computed during branch finalization and used
/// during emission.
final_block_offsets: Vec<CodeOffset>,
/// Size of code, accounting for block layout / alignment.
code_size: CodeOffset,
/// Block-order information.
block_order: BlockLoweringOrder,
/// ABI object.
abi: Box<dyn ABIBody<I = I>>,
@ -121,12 +102,8 @@ pub struct VCodeBuilder<I: VCodeInst> {
/// In-progress VCode.
vcode: VCode<I>,
/// Current basic block instructions, in reverse order (because blocks are
/// built bottom-to-top).
bb_insns: SmallVec<[(I, SourceLoc); 32]>,
/// Current IR-inst instructions, in forward order.
ir_inst_insns: SmallVec<[(I, SourceLoc); 4]>,
/// Index of the last block-start in the vcode.
block_start: InsnIndex,
/// Start of succs for the current block in the concatenated succs list.
succ_start: usize,
@ -137,12 +114,11 @@ pub struct VCodeBuilder<I: VCodeInst> {
impl<I: VCodeInst> VCodeBuilder<I> {
/// Create a new VCodeBuilder.
pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
let vcode = VCode::new(abi);
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
let vcode = VCode::new(abi, block_order);
VCodeBuilder {
vcode,
bb_insns: SmallVec::new(),
ir_inst_insns: SmallVec::new(),
block_start: 0,
succ_start: 0,
cur_srcloc: SourceLoc::default(),
}
@ -153,6 +129,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
&mut *self.vcode.abi
}
/// Access to the BlockLoweringOrder object.
pub fn block_order(&self) -> &BlockLoweringOrder {
&self.vcode.block_order
}
/// Set the type of a VReg.
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
while self.vcode.vreg_types.len() <= vreg.get_index() {
@ -161,53 +142,17 @@ impl<I: VCodeInst> VCodeBuilder<I> {
self.vcode.vreg_types[vreg.get_index()] = ty;
}
/// Return the underlying bb-to-BlockIndex map.
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
&self.vcode.block_by_bb
}
/// Initialize the bb-to-BlockIndex map. Returns the first free
/// BlockIndex.
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
let mut bindex: BlockIndex = 0;
for bb in blocks.iter() {
self.vcode.block_by_bb[*bb] = bindex;
self.vcode.bb_by_block.push(*bb);
bindex += 1;
}
bindex
}
/// Get the BlockIndex for an IR block.
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
self.vcode.block_by_bb[bb]
}
/// Set the current block as the entry block.
pub fn set_entry(&mut self, block: BlockIndex) {
self.vcode.entry = block;
}
/// End the current IR instruction. Must be called after pushing any
/// instructions and prior to ending the basic block.
pub fn end_ir_inst(&mut self) {
while let Some(pair) = self.ir_inst_insns.pop() {
self.bb_insns.push(pair);
}
}
/// End the current basic block. Must be called after emitting vcode insts
/// for IR insts and prior to ending the function (building the VCode).
pub fn end_bb(&mut self) -> BlockIndex {
assert!(self.ir_inst_insns.is_empty());
let block_num = self.vcode.block_ranges.len() as BlockIndex;
// Push the instructions.
let start_idx = self.vcode.insts.len() as InsnIndex;
while let Some((i, loc)) = self.bb_insns.pop() {
self.vcode.insts.push(i);
self.vcode.srclocs.push(loc);
}
pub fn end_bb(&mut self) {
let start_idx = self.block_start;
let end_idx = self.vcode.insts.len() as InsnIndex;
self.block_start = end_idx;
// Add the instruction index range to the list of blocks.
self.vcode.block_ranges.push((start_idx, end_idx));
// End the successors list.
@ -216,8 +161,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
.block_succ_range
.push((self.succ_start, succ_end));
self.succ_start = succ_end;
block_num
}
/// Push an instruction for the current BB and current IR inst within the BB.
@ -225,19 +168,27 @@ impl<I: VCodeInst> VCodeBuilder<I> {
match insn.is_term() {
MachTerminator::None | MachTerminator::Ret => {}
MachTerminator::Uncond(target) => {
self.vcode.block_succs.push(target);
self.vcode.block_succs.push(BlockIx::new(target.get()));
}
MachTerminator::Cond(true_branch, false_branch) => {
self.vcode.block_succs.push(true_branch);
self.vcode.block_succs.push(false_branch);
self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
self.vcode
.block_succs
.push(BlockIx::new(false_branch.get()));
}
MachTerminator::Indirect(targets) => {
for target in targets {
self.vcode.block_succs.push(*target);
self.vcode.block_succs.push(BlockIx::new(target.get()));
}
}
}
self.ir_inst_insns.push((insn, self.cur_srcloc));
self.vcode.insts.push(insn);
self.vcode.srclocs.push(self.cur_srcloc);
}
/// Get the current source location.
pub fn get_srcloc(&self) -> SourceLoc {
self.cur_srcloc
}
/// Set the current source location.
@ -247,8 +198,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
/// Build the final VCode.
pub fn build(self) -> VCode<I> {
assert!(self.ir_inst_insns.is_empty());
assert!(self.bb_insns.is_empty());
self.vcode
}
}
@ -270,35 +219,9 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
}
}
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
let range = vcode.block_insns(BlockIx::new(block));
debug!(
"is_trivial_jump_block: block {} has len {}",
block,
range.len()
);
if range.len() != 1 {
return None;
}
let insn = range.first();
debug!(
" -> only insn is: {:?} with terminator {:?}",
vcode.get_insn(insn),
vcode.get_insn(insn).is_term()
);
match vcode.get_insn(insn).is_term() {
MachTerminator::Uncond(target) => Some(target),
_ => None,
}
}
impl<I: VCodeInst> VCode<I> {
/// New empty VCode.
fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
VCode {
liveins: abi.liveins(),
liveouts: abi.liveouts(),
@ -309,11 +232,7 @@ impl<I: VCodeInst> VCode<I> {
block_ranges: vec![],
block_succ_range: vec![],
block_succs: vec![],
block_by_bb: SecondaryMap::with_default(0),
bb_by_block: vec![],
final_block_order: vec![],
final_block_offsets: vec![],
code_size: 0,
block_order,
abi,
}
}
@ -345,7 +264,7 @@ impl<I: VCodeInst> VCode<I> {
}
/// Get the successors for a block.
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
let (start, end) = self.block_succ_range[block as usize];
&self.block_succs[start..end]
}
@ -354,8 +273,6 @@ impl<I: VCodeInst> VCode<I> {
/// instructions including spliced fill/reload/move instructions, and replace
/// the VCode with them.
pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
self.final_block_order = compute_final_block_order(self);
// Record the spillslot count and clobbered registers for the ABI/stack
// setup code.
self.abi.set_num_spillslots(result.num_spill_slots as usize);
@ -370,11 +287,12 @@ impl<I: VCodeInst> VCode<I> {
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
let mut final_srclocs = vec![];
for block in &self.final_block_order {
let (start, end) = block_ranges[*block as usize];
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
let (start, end) = block_ranges[block as usize];
let final_start = final_insns.len() as InsnIndex;
if *block == self.entry {
if block == self.entry {
// Start with the prologue.
let prologue = self.abi.gen_prologue();
let len = prologue.len();
@ -416,7 +334,7 @@ impl<I: VCodeInst> VCode<I> {
}
let final_end = final_insns.len() as InsnIndex;
final_block_ranges[*block as usize] = (final_start, final_end);
final_block_ranges[block as usize] = (final_start, final_end);
}
debug_assert!(final_insns.len() == final_srclocs.len());
@ -426,174 +344,68 @@ impl<I: VCodeInst> VCode<I> {
self.block_ranges = final_block_ranges;
}
/// Removes redundant branches, rewriting targets to point directly to the
/// ultimate block at the end of a chain of trivial one-target jumps.
pub fn remove_redundant_branches(&mut self) {
// For each block, compute the actual target block, looking through up to one
// block with single-target jumps (this will remove empty edge blocks inserted
// by phi-lowering).
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
.collect();
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
debug!(
"remove_redundant_branches: block_rewrites = {:?}",
block_rewrites
);
refcounts[self.entry as usize] = 1;
for block in 0..self.num_blocks() as u32 {
for insn in self.block_insns(BlockIx::new(block)) {
self.get_insn_mut(insn)
.with_block_rewrites(&block_rewrites[..]);
match self.get_insn(insn).is_term() {
MachTerminator::Uncond(bix) => {
refcounts[bix as usize] += 1;
}
MachTerminator::Cond(bix1, bix2) => {
refcounts[bix1 as usize] += 1;
refcounts[bix2 as usize] += 1;
}
MachTerminator::Indirect(blocks) => {
for block in blocks {
refcounts[*block as usize] += 1;
}
}
_ => {}
}
}
}
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
self.final_block_order = block_order
.into_iter()
.filter(|b| !deleted[*b as usize])
.collect();
// Rewrite successor information based on the block-rewrite map.
for succ in &mut self.block_succs {
let new_succ = block_rewrites[*succ as usize];
*succ = new_succ;
}
}
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
/// depending on fallthrough; and (ii) use concrete offsets.
pub fn finalize_branches(&mut self)
/// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
/// reloc/trap/etc. records ready for use.
pub fn emit(&self) -> MachBuffer<I>
where
I: MachInstEmit<MachSectionSize>,
I: MachInstEmit,
{
// Compute fallthrough block, indexed by block.
let num_final_blocks = self.final_block_order.len();
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
for i in 0..(num_final_blocks - 1) {
let from = self.final_block_order[i];
let to = self.final_block_order[i + 1];
block_fallthrough[from as usize] = Some(to);
}
let mut buffer = MachBuffer::new();
let mut state = Default::default();
// Pass over VCode instructions and finalize two-way branches into
// one-way branches with fallthrough.
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
let flags = self.abi.flags();
let mut cur_srcloc = None;
for block in 0..self.num_blocks() {
let next_block = block_fallthrough[block];
let (start, end) = self.block_ranges[block];
for iix in start..end {
let insn = &mut self.insts[iix as usize];
insn.with_fallthrough_block(next_block);
}
}
let flags = self.abi.flags();
// Compute block offsets.
let mut code_section = MachSectionSize::new(0);
let mut block_offsets = vec![0; self.num_blocks()];
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
block_offsets[block as usize] = code_section.offset;
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize].emit(&mut code_section, flags);
}
}
// We now have the section layout.
self.final_block_offsets = block_offsets;
self.code_size = code_section.size();
// Update branches with known block offsets. This looks like the
// traversal above, but (i) does not update block_offsets, rather uses
// it (so forward references are now possible), and (ii) mutates the
// instructions.
let mut code_section = MachSectionSize::new(0);
for &block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
let (start, end) = self.block_ranges[block as usize];
for iix in start..end {
self.insts[iix as usize]
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
self.insts[iix as usize].emit(&mut code_section, flags);
}
}
}
/// Emit the instructions to a list of sections.
pub fn emit(&self) -> MachSections
where
I: MachInstEmit<MachSection>,
{
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, self.code_size);
let code_section = sections.get_section(code_idx);
let flags = self.abi.flags();
let mut cur_srcloc = SourceLoc::default();
for &block in &self.final_block_order {
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
while new_offset > code_section.cur_offset_from_start() {
let block = block as BlockIndex;
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
// Pad with NOPs up to the aligned block offset.
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
nop.emit(code_section, flags);
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
nop.emit(&mut buffer, flags, &mut Default::default());
}
assert_eq!(code_section.cur_offset_from_start(), new_offset);
assert_eq!(buffer.cur_offset(), new_offset);
let (start, end) = self.block_ranges[block as usize];
buffer.bind_label(MachLabel::from_block(block));
for iix in start..end {
let srcloc = self.srclocs[iix as usize];
if srcloc != cur_srcloc {
if !cur_srcloc.is_default() {
code_section.end_srcloc();
if cur_srcloc != Some(srcloc) {
if cur_srcloc.is_some() {
buffer.end_srcloc();
}
if !srcloc.is_default() {
code_section.start_srcloc(srcloc);
}
cur_srcloc = srcloc;
buffer.start_srcloc(srcloc);
cur_srcloc = Some(srcloc);
}
self.insts[iix as usize].emit(code_section, flags);
self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
}
if !cur_srcloc.is_default() {
code_section.end_srcloc();
cur_srcloc = SourceLoc::default();
if cur_srcloc.is_some() {
buffer.end_srcloc();
cur_srcloc = None;
}
// Do we need an island? Get the worst-case size of the next BB and see if, having
// emitted that many bytes, we will be beyond the deadline.
if block < (self.num_blocks() - 1) as BlockIndex {
let next_block = block + 1;
let next_block_range = self.block_ranges[next_block as usize];
let next_block_size = next_block_range.1 - next_block_range.0;
let worst_case_next_bb = I::worst_case_size() * next_block_size;
if buffer.island_needed(worst_case_next_bb) {
buffer.emit_island();
}
}
}
sections
buffer
}
/// Get the IR block for a BlockIndex, if one exists.
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
if (block as usize) < self.bb_by_block.len() {
Some(self.bb_by_block[block as usize])
} else {
None
}
self.block_order.lowered_order()[block as usize].orig_block()
}
}
@ -629,13 +441,9 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
Range::new(InstIx::new(start), (end - start) as usize)
}
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
let (start, end) = self.block_succ_range[block.get() as usize];
self.block_succs[start..end]
.iter()
.cloned()
.map(BlockIx::new)
.collect()
Cow::Borrowed(&self.block_succs[start..end])
}
fn is_ret(&self, insn: InstIx) -> bool {
@ -649,7 +457,7 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
insn.get_regs(collector)
}
fn map_regs(insn: &mut I, mapper: &RegUsageMapper) {
fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
insn.map_regs(mapper);
}
@ -702,12 +510,11 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "VCode_Debug {{")?;
writeln!(f, " Entry block: {}", self.entry)?;
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
for block in 0..self.num_blocks() {
writeln!(f, "Block {}:", block,)?;
for succ in self.succs(block as BlockIndex) {
writeln!(f, " (successor: Block {})", succ)?;
writeln!(f, " (successor: Block {})", succ.get())?;
}
let (start, end) = self.block_ranges[block];
writeln!(f, " (instruction range: {} .. {})", start, end)?;
@ -726,52 +533,21 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
use std::fmt::Write;
// Calculate an order in which to display the blocks. This is the same
// as final_block_order, but also includes blocks which are in the
// representation but not in final_block_order.
let mut display_order = Vec::<usize>::new();
// First display blocks in `final_block_order`
for bix in &self.final_block_order {
assert!((*bix as usize) < self.num_blocks());
display_order.push(*bix as usize);
}
// Now also take care of those not listed in `final_block_order`.
// This is quadratic, but it's also debug-only code.
for bix in 0..self.num_blocks() {
if display_order.contains(&bix) {
continue;
}
display_order.push(bix);
}
let mut s = String::new();
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
write!(
&mut s,
" Final block order: {:?}\n",
self.final_block_order
)
.unwrap();
for i in 0..self.num_blocks() {
let block = display_order[i];
let block = i as BlockIndex;
let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
{
"** OMITTED **"
} else {
""
};
write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
write!(&mut s, "Block {}:\n", block).unwrap();
if let Some(bb) = self.bindex_to_bb(block) {
write!(&mut s, " (original IR block: {})\n", bb).unwrap();
}
for succ in self.succs(block as BlockIndex) {
write!(&mut s, " (successor: Block {})\n", succ).unwrap();
for succ in self.succs(block) {
write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap();
}
let (start, end) = self.block_ranges[block];
let (start, end) = self.block_ranges[block as usize];
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
for inst in start..end {
write!(

Просмотреть файл

@ -1,52 +0,0 @@
//! A pass that computes the number of uses of any given instruction.
use crate::entity::SecondaryMap;
use crate::ir::dfg::ValueDef;
use crate::ir::Value;
use crate::ir::{DataFlowGraph, Function, Inst};
/// Auxiliary data structure that counts the number of uses of any given
/// instruction in a Function. This is used during instruction selection
/// to essentially do incremental DCE: when an instruction is no longer
/// needed because its computation has been isel'd into another machine
/// instruction at every use site, we can skip it.
#[derive(Clone, Debug)]
pub struct NumUses {
uses: SecondaryMap<Inst, u32>,
}
impl NumUses {
fn new() -> NumUses {
NumUses {
uses: SecondaryMap::with_default(0),
}
}
/// Compute the NumUses analysis result for a function.
pub fn compute(func: &Function) -> NumUses {
let mut uses = NumUses::new();
for bb in func.layout.blocks() {
for inst in func.layout.block_insts(bb) {
for arg in func.dfg.inst_args(inst) {
let v = func.dfg.resolve_aliases(*arg);
uses.add_value(&func.dfg, v);
}
}
}
uses
}
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
match dfg.value_def(v) {
ValueDef::Result(inst, _) => {
self.uses[inst] += 1;
}
_ => {}
}
}
/// Take the complete uses map, consuming this analysis result.
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
self.uses
}
}

887
third_party/rust/cranelift-codegen/src/peepmatic.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,887 @@
//! Glue for working with `peepmatic`-generated peephole optimizers.
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
dfg::DataFlowGraph,
entities::{Inst, Value},
immediates::{Imm64, Uimm64},
instructions::{InstructionData, Opcode},
types, InstBuilder,
};
use crate::isa::TargetIsa;
use cranelift_codegen_shared::condcodes::IntCC;
use peepmatic_runtime::{
cc::ConditionCode,
instruction_set::InstructionSet,
operator::Operator,
part::{Constant, Part},
paths::Path,
r#type::{BitWidth, Kind, Type},
PeepholeOptimizations, PeepholeOptimizer,
};
use std::boxed::Box;
use std::convert::{TryFrom, TryInto};
use std::ptr;
use std::sync::atomic::{AtomicPtr, Ordering};
/// Get the `preopt.peepmatic` peephole optimizer.
pub(crate) fn preopt<'a, 'b>(
isa: &'b dyn TargetIsa,
) -> PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa> {
static SERIALIZED: &[u8] = include_bytes!("preopt.serialized");
// Once initialized, this must never be re-assigned. The initialized value
// is semantically "static data" and is intentionally leaked for the whole
// program's lifetime.
static DESERIALIZED: AtomicPtr<PeepholeOptimizations> = AtomicPtr::new(ptr::null_mut());
// If `DESERIALIZED` has already been initialized, then just use it.
let ptr = DESERIALIZED.load(Ordering::SeqCst);
if let Some(peep_opts) = unsafe { ptr.as_ref() } {
return peep_opts.optimizer(isa);
}
// Otherwise, if `DESERIALIZED` hasn't been initialized, then we need to
// deserialize the peephole optimizations and initialize it. However,
// another thread could be doing the same thing concurrently, so there is a
// race to see who initializes `DESERIALIZED` first, and we need to be
// prepared to both win or lose that race.
let peep_opts = PeepholeOptimizations::deserialize(SERIALIZED)
.expect("should always be able to deserialize `preopt.serialized`");
let peep_opts = Box::into_raw(Box::new(peep_opts));
// Only update `DESERIALIZE` if it is still null, attempting to perform the
// one-time transition from null -> non-null.
if DESERIALIZED
.compare_and_swap(ptr::null_mut(), peep_opts, Ordering::SeqCst)
.is_null()
{
// We won the race to initialize `DESERIALIZED`.
debug_assert_eq!(DESERIALIZED.load(Ordering::SeqCst), peep_opts);
let peep_opts = unsafe { &*peep_opts };
return peep_opts.optimizer(isa);
}
// We lost the race to initialize `DESERIALIZED`. Drop our no-longer-needed
// instance of `peep_opts` and get the pointer to the instance that won the
// race.
let _ = unsafe { Box::from_raw(peep_opts) };
let peep_opts = DESERIALIZED.load(Ordering::SeqCst);
let peep_opts = unsafe { peep_opts.as_ref().unwrap() };
peep_opts.optimizer(isa)
}
/// Either a `Value` or an `Inst`.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ValueOrInst {
Value(Value),
Inst(Inst),
}
impl ValueOrInst {
/// Get the underlying `Value` if any.
pub fn value(&self) -> Option<Value> {
match *self {
Self::Value(v) => Some(v),
Self::Inst(_) => None,
}
}
/// Get the underlying `Inst` if any.
pub fn inst(&self) -> Option<Inst> {
match *self {
Self::Inst(i) => Some(i),
Self::Value(_) => None,
}
}
/// Unwrap the underlying `Value`, panicking if it is not a `Value.
pub fn unwrap_value(&self) -> Value {
self.value().unwrap()
}
/// Unwrap the underlying `Inst`, panicking if it is not a `Inst.
pub fn unwrap_inst(&self) -> Inst {
self.inst().unwrap()
}
/// Is this a `Value`?
pub fn is_value(&self) -> bool {
self.value().is_some()
}
/// Is this an `Inst`?
pub fn is_inst(&self) -> bool {
self.inst().is_some()
}
fn resolve_inst(&self, dfg: &DataFlowGraph) -> Option<Inst> {
match *self {
ValueOrInst::Inst(i) => Some(i),
ValueOrInst::Value(v) => dfg.value_def(v).inst(),
}
}
fn result_bit_width(&self, dfg: &DataFlowGraph) -> u8 {
match *self {
ValueOrInst::Value(v) => dfg.value_type(v).bits().try_into().unwrap(),
ValueOrInst::Inst(inst) => {
let result = dfg.first_result(inst);
dfg.value_type(result).bits().try_into().unwrap()
}
}
}
fn to_constant(&self, pos: &mut FuncCursor) -> Option<Constant> {
let inst = self.resolve_inst(&pos.func.dfg)?;
match pos.func.dfg[inst] {
InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} => {
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
let x: i64 = imm.into();
Some(Constant::Int(x as u64, width))
}
InstructionData::UnaryBool {
opcode: Opcode::Bconst,
imm,
} => {
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
Some(Constant::Bool(imm, width))
}
_ => None,
}
}
}
impl From<Value> for ValueOrInst {
fn from(v: Value) -> ValueOrInst {
ValueOrInst::Value(v)
}
}
impl From<Inst> for ValueOrInst {
fn from(i: Inst) -> ValueOrInst {
ValueOrInst::Inst(i)
}
}
/// Get the fixed bit width of `bit_width`, or if it is polymorphic, the bit
/// width of `root`.
fn bit_width(dfg: &DataFlowGraph, bit_width: BitWidth, root: Inst) -> u8 {
bit_width.fixed_width().unwrap_or_else(|| {
let tyvar = dfg.ctrl_typevar(root);
let ty = dfg.compute_result_type(root, 0, tyvar).unwrap();
u8::try_from(ty.bits()).unwrap()
})
}
/// Convert the constant `c` into an instruction.
fn const_to_value<'a>(builder: impl InstBuilder<'a>, c: Constant, root: Inst) -> Value {
match c {
Constant::Bool(b, width) => {
let width = bit_width(builder.data_flow_graph(), width, root);
let ty = match width {
1 => types::B1,
8 => types::B8,
16 => types::B16,
32 => types::B32,
64 => types::B64,
128 => types::B128,
_ => unreachable!(),
};
builder.bconst(ty, b)
}
Constant::Int(x, width) => {
let width = bit_width(builder.data_flow_graph(), width, root);
let ty = match width {
8 => types::I8,
16 => types::I16,
32 => types::I32,
64 => types::I64,
128 => types::I128,
_ => unreachable!(),
};
builder.iconst(ty, x as i64)
}
}
}
fn part_to_value(pos: &mut FuncCursor, root: Inst, part: Part<ValueOrInst>) -> Option<Value> {
match part {
Part::Instruction(ValueOrInst::Inst(inst)) => {
pos.func.dfg.inst_results(inst).first().copied()
}
Part::Instruction(ValueOrInst::Value(v)) => Some(v),
Part::Constant(c) => Some(const_to_value(pos.ins(), c, root)),
Part::ConditionCode(_) => None,
}
}
impl Opcode {
fn to_peepmatic_operator(&self) -> Option<Operator> {
macro_rules! convert {
( $( $op:ident $(,)* )* ) => {
match self {
$( Self::$op => Some(Operator::$op), )*
_ => None,
}
}
}
convert!(
AdjustSpDown,
AdjustSpDownImm,
Band,
BandImm,
Bconst,
Bint,
Bor,
BorImm,
Brnz,
Brz,
Bxor,
BxorImm,
Iadd,
IaddImm,
Icmp,
IcmpImm,
Iconst,
Ifcmp,
IfcmpImm,
Imul,
ImulImm,
Ireduce,
IrsubImm,
Ishl,
IshlImm,
Isub,
Rotl,
RotlImm,
Rotr,
RotrImm,
Sdiv,
SdivImm,
Select,
Sextend,
Srem,
SremImm,
Sshr,
SshrImm,
Trapnz,
Trapz,
Udiv,
UdivImm,
Uextend,
Urem,
UremImm,
Ushr,
UshrImm,
)
}
}
impl TryFrom<Constant> for Imm64 {
type Error = &'static str;
fn try_from(c: Constant) -> Result<Self, Self::Error> {
match c {
Constant::Int(x, _) => Ok(Imm64::from(x as i64)),
Constant::Bool(..) => Err("cannot create Imm64 from Constant::Bool"),
}
}
}
impl Into<Constant> for Imm64 {
#[inline]
fn into(self) -> Constant {
let x: i64 = self.into();
Constant::Int(x as _, BitWidth::SixtyFour)
}
}
impl Into<Part<ValueOrInst>> for Imm64 {
#[inline]
fn into(self) -> Part<ValueOrInst> {
let c: Constant = self.into();
c.into()
}
}
fn part_to_imm64(pos: &mut FuncCursor, part: Part<ValueOrInst>) -> Imm64 {
return match part {
Part::Instruction(x) => match x.to_constant(pos).unwrap_or_else(|| cannot_convert()) {
Constant::Int(x, _) => (x as i64).into(),
Constant::Bool(..) => cannot_convert(),
},
Part::Constant(Constant::Int(x, _)) => (x as i64).into(),
Part::ConditionCode(_) | Part::Constant(Constant::Bool(..)) => cannot_convert(),
};
#[inline(never)]
#[cold]
fn cannot_convert() -> ! {
panic!("cannot convert part into `Imm64`")
}
}
impl Into<Constant> for Uimm64 {
#[inline]
fn into(self) -> Constant {
let x: u64 = self.into();
Constant::Int(x, BitWidth::SixtyFour)
}
}
impl Into<Part<ValueOrInst>> for Uimm64 {
#[inline]
fn into(self) -> Part<ValueOrInst> {
let c: Constant = self.into();
c.into()
}
}
fn peepmatic_to_intcc(cc: ConditionCode) -> IntCC {
match cc {
ConditionCode::Eq => IntCC::Equal,
ConditionCode::Ne => IntCC::NotEqual,
ConditionCode::Slt => IntCC::SignedLessThan,
ConditionCode::Sle => IntCC::SignedGreaterThanOrEqual,
ConditionCode::Sgt => IntCC::SignedGreaterThan,
ConditionCode::Sge => IntCC::SignedLessThanOrEqual,
ConditionCode::Ult => IntCC::UnsignedLessThan,
ConditionCode::Uge => IntCC::UnsignedGreaterThanOrEqual,
ConditionCode::Ugt => IntCC::UnsignedGreaterThan,
ConditionCode::Ule => IntCC::UnsignedLessThanOrEqual,
ConditionCode::Of => IntCC::Overflow,
ConditionCode::Nof => IntCC::NotOverflow,
}
}
fn intcc_to_peepmatic(cc: IntCC) -> ConditionCode {
match cc {
IntCC::Equal => ConditionCode::Eq,
IntCC::NotEqual => ConditionCode::Ne,
IntCC::SignedLessThan => ConditionCode::Slt,
IntCC::SignedGreaterThanOrEqual => ConditionCode::Sle,
IntCC::SignedGreaterThan => ConditionCode::Sgt,
IntCC::SignedLessThanOrEqual => ConditionCode::Sge,
IntCC::UnsignedLessThan => ConditionCode::Ult,
IntCC::UnsignedGreaterThanOrEqual => ConditionCode::Uge,
IntCC::UnsignedGreaterThan => ConditionCode::Ugt,
IntCC::UnsignedLessThanOrEqual => ConditionCode::Ule,
IntCC::Overflow => ConditionCode::Of,
IntCC::NotOverflow => ConditionCode::Nof,
}
}
fn get_immediate(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Part<ValueOrInst> {
return match dfg[inst] {
InstructionData::BinaryImm64 { imm, .. } if i == 0 => imm.into(),
InstructionData::BranchIcmp { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::BranchInt { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::IntCompare { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::IntCompareImm { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::IntCompareImm { imm, .. } if i == 1 => imm.into(),
InstructionData::IntCond { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::IntCondTrap { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::IntSelect { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
InstructionData::UnaryBool { imm, .. } if i == 0 => {
Constant::Bool(imm, BitWidth::Polymorphic).into()
}
InstructionData::UnaryImm { imm, .. } if i == 0 => imm.into(),
ref otherwise => unsupported(otherwise),
};
#[inline(never)]
#[cold]
fn unsupported(data: &InstructionData) -> ! {
panic!("unsupported instruction data: {:?}", data)
}
}
fn get_argument(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Option<Value> {
dfg.inst_args(inst).get(i).copied()
}
fn peepmatic_ty_to_ir_ty(ty: Type, dfg: &DataFlowGraph, root: Inst) -> types::Type {
match (ty.kind, bit_width(dfg, ty.bit_width, root)) {
(Kind::Int, 8) => types::I8,
(Kind::Int, 16) => types::I16,
(Kind::Int, 32) => types::I32,
(Kind::Int, 64) => types::I64,
(Kind::Int, 128) => types::I128,
(Kind::Bool, 1) => types::B1,
(Kind::Bool, 8) => types::I8,
(Kind::Bool, 16) => types::I16,
(Kind::Bool, 32) => types::I32,
(Kind::Bool, 64) => types::I64,
(Kind::Bool, 128) => types::I128,
_ => unreachable!(),
}
}
// NB: the unsafe contract we must uphold here is that our implementation of
// `instruction_result_bit_width` must always return a valid, non-zero bit
// width.
unsafe impl<'a, 'b> InstructionSet<'b> for &'a dyn TargetIsa {
type Context = FuncCursor<'b>;
type Instruction = ValueOrInst;
fn replace_instruction(
&self,
pos: &mut FuncCursor<'b>,
old: ValueOrInst,
new: Part<ValueOrInst>,
) -> ValueOrInst {
log::trace!("replace {:?} with {:?}", old, new);
let old_inst = old.resolve_inst(&pos.func.dfg).unwrap();
// Try to convert `new` to an instruction, because we prefer replacing
// an old instruction with a new one wholesale. However, if the
// replacement cannot be converted to an instruction (e.g. the
// right-hand side is a block/function parameter value) then we change
// the old instruction's result to an alias of the new value.
let new_inst = match new {
Part::Instruction(ValueOrInst::Inst(inst)) => Some(inst),
Part::Instruction(ValueOrInst::Value(_)) => {
// Do not try and follow the value definition. If we transplant
// this value's instruction, and there are other uses of this
// value, then we could mess up ordering between instructions.
None
}
Part::Constant(c) => {
let v = const_to_value(pos.ins(), c, old_inst);
let inst = pos.func.dfg.value_def(v).unwrap_inst();
Some(inst)
}
Part::ConditionCode(_) => None,
};
match new_inst {
Some(new_inst) => {
pos.func.transplant_inst(old_inst, new_inst);
debug_assert_eq!(pos.current_inst(), Some(old_inst));
old_inst.into()
}
None => {
let new_value = part_to_value(pos, old_inst, new).unwrap();
let old_results = pos.func.dfg.detach_results(old_inst);
let old_results = old_results.as_slice(&pos.func.dfg.value_lists);
assert_eq!(old_results.len(), 1);
let old_value = old_results[0];
pos.func.dfg.change_to_alias(old_value, new_value);
pos.func.dfg.replace(old_inst).nop();
new_value.into()
}
}
}
fn get_part_at_path(
&self,
pos: &mut FuncCursor<'b>,
root: ValueOrInst,
path: Path,
) -> Option<Part<ValueOrInst>> {
// The root is path [0].
debug_assert!(!path.0.is_empty());
debug_assert_eq!(path.0[0], 0);
let mut part = Part::Instruction(root);
for p in path.0[1..].iter().copied() {
let inst = part.as_instruction()?.resolve_inst(&pos.func.dfg)?;
let operator = pos.func.dfg[inst].opcode().to_peepmatic_operator()?;
if p < operator.immediates_arity() {
part = get_immediate(&pos.func.dfg, inst, p as usize);
continue;
}
let arg = p - operator.immediates_arity();
let arg = arg as usize;
let value = get_argument(&pos.func.dfg, inst, arg)?;
part = Part::Instruction(value.into());
}
log::trace!("get_part_at_path({:?}) = {:?}", path, part);
Some(part)
}
fn operator(&self, pos: &mut FuncCursor<'b>, value_or_inst: ValueOrInst) -> Option<Operator> {
let inst = value_or_inst.resolve_inst(&pos.func.dfg)?;
pos.func.dfg[inst].opcode().to_peepmatic_operator()
}
fn make_inst_1(
&self,
pos: &mut FuncCursor<'b>,
root: ValueOrInst,
operator: Operator,
r#type: Type,
a: Part<ValueOrInst>,
) -> ValueOrInst {
log::trace!("make_inst_1: {:?}({:?})", operator, a);
let root = root.resolve_inst(&pos.func.dfg).unwrap();
match operator {
Operator::AdjustSpDown => {
let a = part_to_value(pos, root, a).unwrap();
pos.ins().adjust_sp_down(a).into()
}
Operator::AdjustSpDownImm => {
let c = a.unwrap_constant();
let imm = Imm64::try_from(c).unwrap();
pos.ins().adjust_sp_down_imm(imm).into()
}
Operator::Bconst => {
let c = a.unwrap_constant();
let val = const_to_value(pos.ins(), c, root);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Bint => {
let a = part_to_value(pos, root, a).unwrap();
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
let val = pos.ins().bint(ty, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Brnz => {
let a = part_to_value(pos, root, a).unwrap();
// NB: branching instructions must be the root of an
// optimization's right-hand side, so we get the destination
// block and arguments from the left-hand side's root. Peepmatic
// doesn't currently represent labels or varargs.
let block = pos.func.dfg[root].branch_destination().unwrap();
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
pos.ins().brnz(a, block, &args).into()
}
Operator::Brz => {
let a = part_to_value(pos, root, a).unwrap();
// See the comment in the `Operator::Brnz` match argm.
let block = pos.func.dfg[root].branch_destination().unwrap();
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
pos.ins().brz(a, block, &args).into()
}
Operator::Iconst => {
let a = a.unwrap_constant();
let val = const_to_value(pos.ins(), a, root);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Ireduce => {
let a = part_to_value(pos, root, a).unwrap();
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
let val = pos.ins().ireduce(ty, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Sextend => {
let a = part_to_value(pos, root, a).unwrap();
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
let val = pos.ins().sextend(ty, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Trapnz => {
let a = part_to_value(pos, root, a).unwrap();
// NB: similar to branching instructions (see comment in the
// `Operator::Brnz` match arm) trapping instructions must be the
// root of an optimization's right-hand side, and we get the
// trap code from the root of the left-hand side. Peepmatic
// doesn't currently represent trap codes.
let code = pos.func.dfg[root].trap_code().unwrap();
pos.ins().trapnz(a, code).into()
}
Operator::Trapz => {
let a = part_to_value(pos, root, a).unwrap();
// See comment in the `Operator::Trapnz` match arm.
let code = pos.func.dfg[root].trap_code().unwrap();
pos.ins().trapz(a, code).into()
}
Operator::Uextend => {
let a = part_to_value(pos, root, a).unwrap();
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
let val = pos.ins().uextend(ty, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
_ => unreachable!(),
}
}
fn make_inst_2(
&self,
pos: &mut FuncCursor<'b>,
root: ValueOrInst,
operator: Operator,
_: Type,
a: Part<ValueOrInst>,
b: Part<ValueOrInst>,
) -> ValueOrInst {
log::trace!("make_inst_2: {:?}({:?}, {:?})", operator, a, b);
let root = root.resolve_inst(&pos.func.dfg).unwrap();
match operator {
Operator::Band => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().band(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::BandImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().band_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Bor => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().bor(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::BorImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().bor_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Bxor => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().bxor(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::BxorImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().bxor_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Iadd => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().iadd(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::IaddImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().iadd_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Ifcmp => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ifcmp(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::IfcmpImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ifcmp_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Imul => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().imul(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::ImulImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().imul_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::IrsubImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().irsub_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Ishl => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ishl(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::IshlImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ishl_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Isub => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().isub(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Rotl => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().rotl(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::RotlImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().rotl_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Rotr => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().rotr(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::RotrImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().rotr_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Sdiv => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().sdiv(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::SdivImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().sdiv_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Srem => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().srem(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::SremImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().srem_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Sshr => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().sshr(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::SshrImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().sshr_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Udiv => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().udiv(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::UdivImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().udiv_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Urem => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().urem(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::UremImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().urem_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Ushr => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ushr(a, b);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::UshrImm => {
let a = part_to_imm64(pos, a);
let b = part_to_value(pos, root, b).unwrap();
let val = pos.ins().ushr_imm(b, a);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
_ => unreachable!(),
}
}
fn make_inst_3(
&self,
pos: &mut FuncCursor<'b>,
root: ValueOrInst,
operator: Operator,
_: Type,
a: Part<ValueOrInst>,
b: Part<ValueOrInst>,
c: Part<ValueOrInst>,
) -> ValueOrInst {
log::trace!("make_inst_3: {:?}({:?}, {:?}, {:?})", operator, a, b, c);
let root = root.resolve_inst(&pos.func.dfg).unwrap();
match operator {
Operator::Icmp => {
let cond = a.unwrap_condition_code();
let cond = peepmatic_to_intcc(cond);
let b = part_to_value(pos, root, b).unwrap();
let c = part_to_value(pos, root, c).unwrap();
let val = pos.ins().icmp(cond, b, c);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::IcmpImm => {
let cond = a.unwrap_condition_code();
let cond = peepmatic_to_intcc(cond);
let imm = part_to_imm64(pos, b);
let c = part_to_value(pos, root, c).unwrap();
let val = pos.ins().icmp_imm(cond, c, imm);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
Operator::Select => {
let a = part_to_value(pos, root, a).unwrap();
let b = part_to_value(pos, root, b).unwrap();
let c = part_to_value(pos, root, c).unwrap();
let val = pos.ins().select(a, b, c);
pos.func.dfg.value_def(val).unwrap_inst().into()
}
_ => unreachable!(),
}
}
fn instruction_to_constant(
&self,
pos: &mut FuncCursor<'b>,
value_or_inst: ValueOrInst,
) -> Option<Constant> {
value_or_inst.to_constant(pos)
}
fn instruction_result_bit_width(
&self,
pos: &mut FuncCursor<'b>,
value_or_inst: ValueOrInst,
) -> u8 {
value_or_inst.result_bit_width(&pos.func.dfg)
}
fn native_word_size_in_bits(&self, _pos: &mut FuncCursor<'b>) -> u8 {
self.pointer_bits()
}
}

Просмотреть файл

@ -271,6 +271,42 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
.replace(inst)
.sload32_complex(info.flags, &args, info.offset);
}
Opcode::Uload8x8 => {
pos.func
.dfg
.replace(inst)
.uload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Sload8x8 => {
pos.func
.dfg
.replace(inst)
.sload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Uload16x4 => {
pos.func
.dfg
.replace(inst)
.uload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Sload16x4 => {
pos.func
.dfg
.replace(inst)
.sload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Uload32x2 => {
pos.func
.dfg
.replace(inst)
.uload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Sload32x2 => {
pos.func
.dfg
.replace(inst)
.sload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Store => {
pos.func.dfg.replace(inst).store_complex(
info.flags,
@ -305,7 +341,7 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
}
_ => panic!("Unsupported load or store opcode"),
},
InstructionData::BinaryImm {
InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg,
imm,

193
third_party/rust/cranelift-codegen/src/preopt.peepmatic поставляемый Normal file
Просмотреть файл

@ -0,0 +1,193 @@
;; Apply basic simplifications.
;;
;; This folds constants with arithmetic to form `_imm` instructions, and other
;; minor simplifications.
;;
;; Doesn't apply some simplifications if the native word width (in bytes) is
;; smaller than the controlling type's width of the instruction. This would
;; result in an illegal instruction that would likely be expanded back into an
;; instruction on smaller types with the same initial opcode, creating
;; unnecessary churn.
;; Binary instructions whose second argument is constant.
(=> (when (iadd $x $C)
(fits-in-native-word $C))
(iadd_imm $C $x))
(=> (when (imul $x $C)
(fits-in-native-word $C))
(imul_imm $C $x))
(=> (when (sdiv $x $C)
(fits-in-native-word $C))
(sdiv_imm $C $x))
(=> (when (udiv $x $C)
(fits-in-native-word $C))
(udiv_imm $C $x))
(=> (when (srem $x $C)
(fits-in-native-word $C))
(srem_imm $C $x))
(=> (when (urem $x $C)
(fits-in-native-word $C))
(urem_imm $C $x))
(=> (when (band $x $C)
(fits-in-native-word $C))
(band_imm $C $x))
(=> (when (bor $x $C)
(fits-in-native-word $C))
(bor_imm $C $x))
(=> (when (bxor $x $C)
(fits-in-native-word $C))
(bxor_imm $C $x))
(=> (when (rotl $x $C)
(fits-in-native-word $C))
(rotl_imm $C $x))
(=> (when (rotr $x $C)
(fits-in-native-word $C))
(rotr_imm $C $x))
(=> (when (ishl $x $C)
(fits-in-native-word $C))
(ishl_imm $C $x))
(=> (when (ushr $x $C)
(fits-in-native-word $C))
(ushr_imm $C $x))
(=> (when (sshr $x $C)
(fits-in-native-word $C))
(sshr_imm $C $x))
(=> (when (isub $x $C)
(fits-in-native-word $C))
(iadd_imm $(neg $C) $x))
(=> (when (ifcmp $x $C)
(fits-in-native-word $C))
(ifcmp_imm $C $x))
(=> (when (icmp $cond $x $C)
(fits-in-native-word $C))
(icmp_imm $cond $C $x))
;; Binary instructions whose first operand is constant.
(=> (when (iadd $C $x)
(fits-in-native-word $C))
(iadd_imm $C $x))
(=> (when (imul $C $x)
(fits-in-native-word $C))
(imul_imm $C $x))
(=> (when (band $C $x)
(fits-in-native-word $C))
(band_imm $C $x))
(=> (when (bor $C $x)
(fits-in-native-word $C))
(bor_imm $C $x))
(=> (when (bxor $C $x)
(fits-in-native-word $C))
(bxor_imm $C $x))
(=> (when (isub $C $x)
(fits-in-native-word $C))
(irsub_imm $C $x))
;; Unary instructions whose operand is constant.
(=> (adjust_sp_down $C) (adjust_sp_down_imm $C))
;; Fold `(binop_imm $C1 (binop_imm $C2 $x))` into `(binop_imm $(binop $C2 $C1) $x)`.
(=> (iadd_imm $C1 (iadd_imm $C2 $x)) (iadd_imm $(iadd $C1 $C2) $x))
(=> (imul_imm $C1 (imul_imm $C2 $x)) (imul_imm $(imul $C1 $C2) $x))
(=> (bor_imm $C1 (bor_imm $C2 $x)) (bor_imm $(bor $C1 $C2) $x))
(=> (band_imm $C1 (band_imm $C2 $x)) (band_imm $(band $C1 $C2) $x))
(=> (bxor_imm $C1 (bxor_imm $C2 $x)) (bxor_imm $(bxor $C1 $C2) $x))
;; Remove operations that are no-ops.
(=> (iadd_imm 0 $x) $x)
(=> (imul_imm 1 $x) $x)
(=> (sdiv_imm 1 $x) $x)
(=> (udiv_imm 1 $x) $x)
(=> (bor_imm 0 $x) $x)
(=> (band_imm -1 $x) $x)
(=> (bxor_imm 0 $x) $x)
(=> (rotl_imm 0 $x) $x)
(=> (rotr_imm 0 $x) $x)
(=> (ishl_imm 0 $x) $x)
(=> (ushr_imm 0 $x) $x)
(=> (sshr_imm 0 $x) $x)
;; Replace with zero.
(=> (imul_imm 0 $x) 0)
(=> (band_imm 0 $x) 0)
;; Replace with negative 1.
(=> (bor_imm -1 $x) -1)
;; Transform `[(x << N) >> N]` into a (un)signed-extending move.
;;
;; i16 -> i8 -> i16
(=> (when (ushr_imm 8 (ishl_imm 8 $x))
(bit-width $x 16))
(uextend{i16} (ireduce{i8} $x)))
(=> (when (sshr_imm 8 (ishl_imm 8 $x))
(bit-width $x 16))
(sextend{i16} (ireduce{i8} $x)))
;; i32 -> i8 -> i32
(=> (when (ushr_imm 24 (ishl_imm 24 $x))
(bit-width $x 32))
(uextend{i32} (ireduce{i8} $x)))
(=> (when (sshr_imm 24 (ishl_imm 24 $x))
(bit-width $x 32))
(sextend{i32} (ireduce{i8} $x)))
;; i32 -> i16 -> i32
(=> (when (ushr_imm 16 (ishl_imm 16 $x))
(bit-width $x 32))
(uextend{i32} (ireduce{i16} $x)))
(=> (when (sshr_imm 16 (ishl_imm 16 $x))
(bit-width $x 32))
(sextend{i32} (ireduce{i16} $x)))
;; i64 -> i8 -> i64
(=> (when (ushr_imm 56 (ishl_imm 56 $x))
(bit-width $x 64))
(uextend{i64} (ireduce{i8} $x)))
(=> (when (sshr_imm 56 (ishl_imm 56 $x))
(bit-width $x 64))
(sextend{i64} (ireduce{i8} $x)))
;; i64 -> i16 -> i64
(=> (when (ushr_imm 48 (ishl_imm 48 $x))
(bit-width $x 64))
(uextend{i64} (ireduce{i16} $x)))
(=> (when (sshr_imm 48 (ishl_imm 48 $x))
(bit-width $x 64))
(sextend{i64} (ireduce{i16} $x)))
;; i64 -> i32 -> i64
(=> (when (ushr_imm 32 (ishl_imm 32 $x))
(bit-width $x 64))
(uextend{i64} (ireduce{i32} $x)))
(=> (when (sshr_imm 32 (ishl_imm 32 $x))
(bit-width $x 64))
(sextend{i64} (ireduce{i32} $x)))
;; Fold away redundant `bint` instructions that accept both integer and boolean
;; arguments.
(=> (select (bint $x) $y $z) (select $x $y $z))
(=> (brz (bint $x)) (brz $x))
(=> (brnz (bint $x)) (brnz $x))
(=> (trapz (bint $x)) (trapz $x))
(=> (trapnz (bint $x)) (trapnz $x))
;; Fold comparisons into branch operations when possible.
;;
;; This matches against operations which compare against zero, then use the
;; result in a `brz` or `brnz` branch. It folds those two operations into a
;; single `brz` or `brnz`.
(=> (brnz (icmp_imm ne 0 $x)) (brnz $x))
(=> (brz (icmp_imm ne 0 $x)) (brz $x))
(=> (brnz (icmp_imm eq 0 $x)) (brz $x))
(=> (brz (icmp_imm eq 0 $x)) (brnz $x))
;; Division and remainder by constants.
;;
;; TODO: this section is incomplete, and a bunch of related optimizations are
;; still hand-coded in `simple_preopt.rs`.
;; (Division by one is handled above.)
;; Remainder by one is zero.
(=> (urem_imm 1 $x) 0)
(=> (srem_imm 1 $x) 0)
;; Division by a power of two -> shift right.
(=> (when (udiv_imm $C $x)
(is-power-of-two $C))
(ushr_imm $(log2 $C) $x))

Двоичные данные
third_party/rust/cranelift-codegen/src/preopt.serialized поставляемый Normal file

Двоичный файл не отображается.

393
third_party/rust/cranelift-codegen/src/remove_constant_phis.rs поставляемый Normal file
Просмотреть файл

@ -0,0 +1,393 @@
//! A Constant-Phi-Node removal pass.
use log::info;
use crate::dominator_tree::DominatorTree;
use crate::entity::EntityList;
use crate::fx::FxHashMap;
use crate::fx::FxHashSet;
use crate::ir::instructions::BranchInfo;
use crate::ir::Function;
use crate::ir::{Block, Inst, Value};
use crate::timing;
use smallvec::{smallvec, SmallVec};
use std::vec::Vec;
// A note on notation. For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
//
// block4(v16: i32, v18: i32): <-- formal parameters
// ...
// brnz v27, block7(v22, v24) <-- actual parameters
// jump block6
// This transformation pass (conceptually) partitions all values in the
// function into two groups:
//
// * Group A: values defined by block formal parameters, except for the entry block.
//
// * Group B: All other values: that is, values defined by instructions,
// and the formals of the entry block.
//
// For each value in Group A, it attempts to establish whether it will have
// the value of exactly one member of Group B. If so, the formal parameter is
// deleted, all corresponding actual parameters (in jumps/branches to the
// defining block) are deleted, and a rename is inserted.
//
// The entry block is special-cased because (1) we don't know what values flow
// to its formals and (2) in any case we can't change its formals.
//
// Work proceeds in three phases.
//
// * Phase 1: examine all instructions. For each block, make up a useful
// grab-bag of information, `BlockSummary`, that summarises the block's
// formals and jump/branch instruction. This is used by Phases 2 and 3.
//
// * Phase 2: for each value in Group A, try to find a single Group B value
// that flows to it. This is done using a classical iterative forward
// dataflow analysis over a simple constant-propagation style lattice. It
// converges quickly in practice -- I have seen at most 4 iterations. This
// is relatively cheap because the iteration is done over the
// `BlockSummary`s, and does not visit each instruction. The resulting
// fixed point is stored in a `SolverState`.
//
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
// remove redundant formals and actuals, and to insert suitable renames.
//
// Note that the effectiveness of the analysis depends on on the fact that
// there are no copy instructions in Cranelift's IR. If there were, the
// computation of `actual_absval` in Phase 2 would have to be extended to
// chase through such copies.
//
// For large functions, the analysis cost using the new AArch64 backend is about
// 0.6% of the non-optimising compile time, as measured by instruction counts.
// This transformation usually pays for itself several times over, though, by
// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
// seen for large functions.
// The `Value`s (Group B) that can flow to a formal parameter (Group A).
#[derive(Clone, Copy, Debug, PartialEq)]
enum AbstractValue {
// Two or more values flow to this formal.
Many,
// Exactly one value, as stated, flows to this formal. The `Value`s that
// can appear here are exactly: `Value`s defined by `Inst`s, plus the
// `Value`s defined by the formals of the entry block. Note that this is
// exactly the set of `Value`s that are *not* tracked in the solver below
// (see `SolverState`).
One(Value /*Group B*/),
// No value flows to this formal.
None,
}
impl AbstractValue {
fn join(self, other: AbstractValue) -> AbstractValue {
match (self, other) {
// Joining with `None` has no effect
(AbstractValue::None, p2) => p2,
(p1, AbstractValue::None) => p1,
// Joining with `Many` produces `Many`
(AbstractValue::Many, _p2) => AbstractValue::Many,
(_p1, AbstractValue::Many) => AbstractValue::Many,
// The only interesting case
(AbstractValue::One(v1), AbstractValue::One(v2)) => {
if v1 == v2 {
AbstractValue::One(v1)
} else {
AbstractValue::Many
}
}
}
}
fn is_one(self) -> bool {
if let AbstractValue::One(_) = self {
true
} else {
false
}
}
}
// For some block, a useful bundle of info. The `Block` itself is not stored
// here since it will be the key in the associated `FxHashMap` -- see
// `summaries` below. For the `SmallVec` tuning params: most blocks have
// few parameters, hence `4`. And almost all blocks have either one or two
// successors, hence `2`.
#[derive(Debug)]
struct BlockSummary {
// Formal parameters for this `Block`
formals: SmallVec<[Value; 4] /*Group A*/>,
// For each `Inst` in this block that transfers to another block: the
// `Inst` itself, the destination `Block`, and the actual parameters
// passed. We don't bother to include transfers that pass zero parameters
// since that makes more work for the solver for no purpose.
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
}
impl BlockSummary {
fn new(formals: SmallVec<[Value; 4]>) -> Self {
Self {
formals,
dests: smallvec![],
}
}
}
// Solver state. This holds a AbstractValue for each formal parameter, except
// for those from the entry block.
struct SolverState {
absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
}
impl SolverState {
fn new() -> Self {
Self {
absvals: FxHashMap::default(),
}
}
fn get(&self, actual: Value) -> AbstractValue {
match self.absvals.get(&actual) {
Some(lp) => *lp,
None => panic!("SolverState::get: formal param {:?} is untracked?!", actual),
}
}
fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
self.absvals.get(&actual)
}
fn set(&mut self, actual: Value, lp: AbstractValue) {
match self.absvals.insert(actual, lp) {
Some(_old_lp) => {}
None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
}
}
}
/// Detect phis in `func` that will only ever produce one value, using a
/// classic forward dataflow analysis. Then remove them.
#[inline(never)]
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::remove_constant_phis();
debug_assert!(domtree.is_valid());
// Get the blocks, in reverse postorder
let mut blocks_reverse_postorder = Vec::<Block>::new();
for block in domtree.cfg_postorder() {
blocks_reverse_postorder.push(*block);
}
blocks_reverse_postorder.reverse();
// Phase 1 of 3: for each block, make a summary containing all relevant
// info. The solver will iterate over the summaries, rather than having
// to inspect each instruction in each block.
let mut summaries = FxHashMap::<Block, BlockSummary>::default();
for b in &blocks_reverse_postorder {
let formals = func.dfg.block_params(*b);
let mut summary = BlockSummary::new(SmallVec::from(formals));
for inst in func.layout.block_insts(*b) {
let idetails = &func.dfg[inst];
// Note that multi-dest transfers (i.e., branch tables) don't
// carry parameters in our IR, so we only have to care about
// `SingleDest` here.
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
{
let inst_var_args = func.dfg.inst_variable_args(inst);
// Skip branches/jumps that carry no params.
if inst_var_args.len() > 0 {
let mut actuals = SmallVec::<[Value; 4]>::new();
for arg in inst_var_args {
let arg = func.dfg.resolve_aliases(*arg);
actuals.push(arg);
}
summary.dests.push((inst, dest, actuals));
}
}
}
// Ensure the invariant that all blocks (except for the entry) appear
// in the summary, *unless* they have neither formals nor any
// param-carrying branches/jumps.
if formals.len() > 0 || summary.dests.len() > 0 {
summaries.insert(*b, summary);
}
}
// Phase 2 of 3: iterate over the summaries in reverse postorder,
// computing new `AbstractValue`s for each tracked `Value`. The set of
// tracked `Value`s is exactly Group A as described above.
let entry_block = func
.layout
.entry_block()
.expect("remove_constant_phis: entry block unknown");
// Set up initial solver state
let mut state = SolverState::new();
for b in &blocks_reverse_postorder {
// For each block, get the formals
if *b == entry_block {
continue;
}
let formals: &[Value] = func.dfg.block_params(*b);
for formal in formals {
let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
assert!(mb_old_absval.is_none());
}
}
// Solve: repeatedly traverse the blocks in reverse postorder, until there
// are no changes.
let mut iter_no = 0;
loop {
iter_no += 1;
let mut changed = false;
for src in &blocks_reverse_postorder {
let mb_src_summary = summaries.get(src);
// The src block might have no summary. This means it has no
// branches/jumps that carry parameters *and* it doesn't take any
// parameters itself. Phase 1 ensures this. So we can ignore it.
if mb_src_summary.is_none() {
continue;
}
let src_summary = mb_src_summary.unwrap();
for (_inst, dst, src_actuals) in &src_summary.dests {
assert!(*dst != entry_block);
// By contrast, the dst block must have a summary. Phase 1
// will have only included an entry in `src_summary.dests` if
// that branch/jump carried at least one parameter. So the
// dst block does take parameters, so it must have a summary.
let dst_summary = summaries
.get(dst)
.expect("remove_constant_phis: dst block has no summary");
let dst_formals = &dst_summary.formals;
assert!(src_actuals.len() == dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
// Find the abstract value for `actual`. If it is a block
// formal parameter then the most recent abstract value is
// to be found in the solver state. If not, then it's a
// real value defining point (not a phi), in which case
// return it itself.
let actual_absval = match state.maybe_get(*actual) {
Some(pt) => *pt,
None => AbstractValue::One(*actual),
};
// And `join` the new value with the old.
let formal_absval_old = state.get(*formal);
let formal_absval_new = formal_absval_old.join(actual_absval);
if formal_absval_new != formal_absval_old {
changed = true;
state.set(*formal, formal_absval_new);
}
}
}
}
if !changed {
break;
}
}
let mut n_consts = 0;
for absval in state.absvals.values() {
if absval.is_one() {
n_consts += 1;
}
}
// Phase 3 of 3: edit the function to remove constant formals, using the
// summaries and the final solver state as a guide.
// Make up a set of blocks that need editing.
let mut need_editing = FxHashSet::<Block>::default();
for (block, summary) in &summaries {
if *block == entry_block {
continue;
}
for formal in &summary.formals {
let formal_absval = state.get(*formal);
if formal_absval.is_one() {
need_editing.insert(*block);
break;
}
}
}
// Firstly, deal with the formals. For each formal which is redundant,
// remove it, and also add a reroute from it to the constant value which
// it we know it to be.
for b in &need_editing {
let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
let formals: &[Value] = func.dfg.block_params(*b);
for formal in formals {
// The state must give an absval for `formal`.
if let AbstractValue::One(replacement_val) = state.get(*formal) {
del_these.push((*formal, replacement_val));
}
}
// We can delete the formals in any order. However,
// `remove_block_param` works by sliding backwards all arguments to
// the right of the it is asked to delete. Hence when removing more
// than one formal, it is significantly more efficient to ask it to
// remove the rightmost formal first, and hence this `reverse`.
del_these.reverse();
for (redundant_formal, replacement_val) in del_these {
func.dfg.remove_block_param(redundant_formal);
func.dfg.change_to_alias(redundant_formal, replacement_val);
}
}
// Secondly, visit all branch insns. If the destination has had its
// formals changed, change the actuals accordingly. Don't scan all insns,
// rather just visit those as listed in the summaries we prepared earlier.
for (_src_block, summary) in &summaries {
for (inst, dst_block, _src_actuals) in &summary.dests {
if !need_editing.contains(dst_block) {
continue;
}
let old_actuals = func.dfg[*inst].take_value_list().unwrap();
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
let num_fixed_actuals = func.dfg[*inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let dst_summary = summaries.get(&dst_block).unwrap();
// Check that the numbers of arguments make sense.
assert!(num_fixed_actuals <= num_old_actuals);
assert!(num_fixed_actuals + dst_summary.formals.len() == num_old_actuals);
// Create a new value list.
let mut new_actuals = EntityList::<Value>::new();
// Copy the fixed args to the new list
for i in 0..num_fixed_actuals {
let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
new_actuals.push(val, &mut func.dfg.value_lists);
}
// Copy the variable args (the actual block params) to the new
// list, filtering out redundant ones.
for i in 0..dst_summary.formals.len() {
let actual_i = old_actuals
.get(num_fixed_actuals + i, &func.dfg.value_lists)
.unwrap();
let formal_i = dst_summary.formals[i];
let is_redundant = state.get(formal_i).is_one();
if !is_redundant {
new_actuals.push(actual_i, &mut func.dfg.value_lists);
}
}
func.dfg[*inst].put_value_list(new_actuals);
}
}
info!(
"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
iter_no,
state.absvals.len(),
n_consts
);
}

Просмотреть файл

@ -10,10 +10,8 @@ use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{
condcodes::{CondCode, IntCC},
dfg::ValueDef,
immediates,
instructions::{Opcode, ValueList},
types::{I16, I32, I64, I8},
instructions::Opcode,
types::{I32, I64},
Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
};
use crate::isa::TargetIsa;
@ -144,7 +142,7 @@ fn package_up_divrem_info(
/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
/// signedness, operation size and div-vs-rem-ness in a handy bundle.
fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] {
if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
let (is_signed, is_rem) = match opcode {
Opcode::UdivImm => (false, false),
Opcode::UremImm => (false, true),
@ -468,340 +466,6 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
}
}
#[inline]
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} = dfg[candidate_inst]
{
return Some(imm);
}
}
None
}
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
/// Returns true if the final instruction has been converted to such a move.
fn try_fold_extended_move(
pos: &mut FuncCursor,
inst: Inst,
opcode: Opcode,
arg: Value,
imm: immediates::Imm64,
) -> bool {
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
if let InstructionData::BinaryImm {
opcode: Opcode::IshlImm,
arg: prev_arg,
imm: prev_imm,
} = &pos.func.dfg[arg_inst]
{
if imm != *prev_imm {
return false;
}
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
return false;
}
let imm_bits: i64 = imm.into();
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
8 => I8,
16 => I16,
32 => I32,
_ => return false,
};
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
// This becomes a no-op, since ireduce_ty has a smaller lane width than
// the argument type (also the destination type).
let arg = *prev_arg;
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
if opcode == Opcode::UshrImm {
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
} else {
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
}
return true;
}
}
false
}
/// Apply basic simplifications.
///
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
/// simplifications.
///
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
/// controlling type's width of the instruction. This would result in an illegal instruction that
/// would likely be expanded back into an instruction on smaller types with the same initial
/// opcode, creating unnecessary churn.
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
match pos.func.dfg[inst] {
InstructionData::Binary { opcode, args } => {
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Sdiv => Opcode::SdivImm,
Opcode::Udiv => Opcode::UdivImm,
Opcode::Srem => Opcode::SremImm,
Opcode::Urem => Opcode::UremImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Rotl => Opcode::RotlImm,
Opcode::Rotr => Opcode::RotrImm,
Opcode::Ishl => Opcode::IshlImm,
Opcode::Ushr => Opcode::UshrImm,
Opcode::Sshr => Opcode::SshrImm,
Opcode::Isub => {
imm = imm.wrapping_neg();
Opcode::IaddImm
}
Opcode::Ifcmp => Opcode::IfcmpImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty.bytes() <= native_word_width {
pos.func
.dfg
.replace(inst)
.BinaryImm(new_opcode, ty, imm, args[0]);
// Repeat for BinaryImm simplification.
simplify(pos, inst, native_word_width);
}
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Isub => Opcode::IrsubImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty.bytes() <= native_word_width {
pos.func
.dfg
.replace(inst)
.BinaryImm(new_opcode, ty, imm, args[1]);
}
}
}
InstructionData::Unary { opcode, arg } => {
if let Opcode::AdjustSpDown = opcode {
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
// Note this works for both positive and negative immediate values.
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
}
}
}
InstructionData::BinaryImm { opcode, arg, imm } => {
let ty = pos.func.dfg.ctrl_typevar(inst);
let mut arg = arg;
let mut imm = imm;
match opcode {
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::BorImm
| Opcode::BandImm
| Opcode::BxorImm => {
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
if let InstructionData::BinaryImm {
opcode: prev_opcode,
arg: prev_arg,
imm: prev_imm,
} = &pos.func.dfg[arg_inst]
{
if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) {
let lhs: i64 = imm.into();
let rhs: i64 = (*prev_imm).into();
let new_imm = match opcode {
Opcode::BorImm => lhs | rhs,
Opcode::BandImm => lhs & rhs,
Opcode::BxorImm => lhs ^ rhs,
Opcode::IaddImm => lhs.wrapping_add(rhs),
Opcode::ImulImm => lhs.wrapping_mul(rhs),
_ => panic!("can't happen"),
};
let new_imm = immediates::Imm64::from(new_imm);
let new_arg = *prev_arg;
pos.func
.dfg
.replace(inst)
.BinaryImm(opcode, ty, new_imm, new_arg);
imm = new_imm;
arg = new_arg;
}
}
}
}
Opcode::UshrImm | Opcode::SshrImm => {
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
{
return;
}
}
_ => {}
};
// Replace operations that are no-ops.
match (opcode, imm.into()) {
(Opcode::IaddImm, 0)
| (Opcode::ImulImm, 1)
| (Opcode::SdivImm, 1)
| (Opcode::UdivImm, 1)
| (Opcode::BorImm, 0)
| (Opcode::BandImm, -1)
| (Opcode::BxorImm, 0)
| (Opcode::RotlImm, 0)
| (Opcode::RotrImm, 0)
| (Opcode::IshlImm, 0)
| (Opcode::UshrImm, 0)
| (Opcode::SshrImm, 0) => {
// Alias the result value with the original argument.
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
}
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
// Replace by zero.
pos.func.dfg.replace(inst).iconst(ty, 0);
}
(Opcode::BorImm, -1) => {
// Replace by minus one.
pos.func.dfg.replace(inst).iconst(ty, -1);
}
_ => {}
}
}
InstructionData::IntCompare { opcode, cond, args } => {
debug_assert_eq!(opcode, Opcode::Icmp);
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
}
}
}
InstructionData::CondTrap { .. }
| InstructionData::Branch { .. }
| InstructionData::Ternary {
opcode: Opcode::Select,
..
} => {
// Fold away a redundant `bint`.
let condition_def = {
let args = pos.func.dfg.inst_args(inst);
pos.func.dfg.value_def(args[0])
};
if let ValueDef::Result(def_inst, _) = condition_def {
if let InstructionData::Unary {
opcode: Opcode::Bint,
arg: bool_val,
} = pos.func.dfg[def_inst]
{
let args = pos.func.dfg.inst_args_mut(inst);
args[0] = bool_val;
}
}
}
_ => {}
}
}
struct BranchOptInfo {
br_inst: Inst,
cmp_arg: Value,
args: ValueList,
new_opcode: Opcode,
}
/// Fold comparisons into branch operations when possible.
///
/// This matches against operations which compare against zero, then use the
/// result in a `brz` or `brnz` branch. It folds those two operations into a
/// single `brz` or `brnz`.
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
let mut info = if let InstructionData::Branch {
opcode: br_opcode,
args: ref br_args,
..
} = pos.func.dfg[inst]
{
let first_arg = {
let args = pos.func.dfg.inst_args(inst);
args[0]
};
let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
icmp_inst
} else {
return;
};
if let InstructionData::IntCompareImm {
opcode: Opcode::IcmpImm,
arg: cmp_arg,
cond: cmp_cond,
imm: cmp_imm,
} = pos.func.dfg[icmp_inst]
{
let cmp_imm: i64 = cmp_imm.into();
if cmp_imm != 0 {
return;
}
// icmp_imm returns non-zero when the comparison is true. So, if
// we're branching on zero, we need to invert the condition.
let cond = match br_opcode {
Opcode::Brz => cmp_cond.inverse(),
Opcode::Brnz => cmp_cond,
_ => return,
};
let new_opcode = match cond {
IntCC::Equal => Opcode::Brz,
IntCC::NotEqual => Opcode::Brnz,
_ => return,
};
BranchOptInfo {
br_inst: inst,
cmp_arg,
args: br_args.clone(),
new_opcode,
}
} else {
return;
}
} else {
return;
};
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
*opcode = info.new_opcode;
} else {
panic!();
}
}
enum BranchOrderKind {
BrzToBrnz(Value),
BrnzToBrz(Value),
@ -944,15 +608,490 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block,
cfg.recompute_block(pos.func, block);
}
#[cfg(feature = "enable-peepmatic")]
mod simplify {
use super::*;
use crate::peepmatic::ValueOrInst;
pub type PeepholeOptimizer<'a, 'b> =
peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
crate::peepmatic::preopt(isa)
}
pub fn apply_all<'a, 'b>(
optimizer: &mut PeepholeOptimizer<'a, 'b>,
pos: &mut FuncCursor<'a>,
inst: Inst,
_native_word_width: u32,
) {
// After we apply one optimization, that might make another
// optimization applicable. Keep running the peephole optimizer
// until either:
//
// * No optimization applied, and therefore it doesn't make sense to
// try again, because no optimization will apply again.
//
// * Or when we replaced an instruction with an alias to an existing
// value, because we already ran the peephole optimizer over the
// aliased value's instruction in an early part of the traversal
// over the function.
while let Some(ValueOrInst::Inst(new_inst)) =
optimizer.apply_one(pos, ValueOrInst::Inst(inst))
{
// We transplanted a new instruction into the current
// instruction, so the "new" instruction is actually the same
// one, just with different data.
debug_assert_eq!(new_inst, inst);
}
debug_assert_eq!(pos.current_inst(), Some(inst));
}
}
#[cfg(not(feature = "enable-peepmatic"))]
mod simplify {
use super::*;
use crate::ir::{
dfg::ValueDef,
immediates,
instructions::{Opcode, ValueList},
types::{B8, I16, I32, I8},
};
use std::marker::PhantomData;
pub struct PeepholeOptimizer<'a, 'b> {
phantom: PhantomData<(&'a (), &'b ())>,
}
pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
PeepholeOptimizer {
phantom: PhantomData,
}
}
pub fn apply_all<'a, 'b>(
_optimizer: &mut PeepholeOptimizer<'a, 'b>,
pos: &mut FuncCursor<'a>,
inst: Inst,
native_word_width: u32,
) {
simplify(pos, inst, native_word_width);
branch_opt(pos, inst);
}
#[inline]
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
if let InstructionData::UnaryImm {
opcode: Opcode::Iconst,
imm,
} = dfg[candidate_inst]
{
return Some(imm);
}
}
None
}
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
/// Returns true if the final instruction has been converted to such a move.
fn try_fold_extended_move(
pos: &mut FuncCursor,
inst: Inst,
opcode: Opcode,
arg: Value,
imm: immediates::Imm64,
) -> bool {
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
if let InstructionData::BinaryImm64 {
opcode: Opcode::IshlImm,
arg: prev_arg,
imm: prev_imm,
} = &pos.func.dfg[arg_inst]
{
if imm != *prev_imm {
return false;
}
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
return false;
}
let imm_bits: i64 = imm.into();
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
8 => I8,
16 => I16,
32 => I32,
_ => return false,
};
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
// This becomes a no-op, since ireduce_ty has a smaller lane width than
// the argument type (also the destination type).
let arg = *prev_arg;
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
if opcode == Opcode::UshrImm {
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
} else {
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
}
return true;
}
}
false
}
/// Apply basic simplifications.
///
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
/// simplifications.
///
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
/// controlling type's width of the instruction. This would result in an illegal instruction that
/// would likely be expanded back into an instruction on smaller types with the same initial
/// opcode, creating unnecessary churn.
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
match pos.func.dfg[inst] {
InstructionData::Binary { opcode, args } => {
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Sdiv => Opcode::SdivImm,
Opcode::Udiv => Opcode::UdivImm,
Opcode::Srem => Opcode::SremImm,
Opcode::Urem => Opcode::UremImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Rotl => Opcode::RotlImm,
Opcode::Rotr => Opcode::RotrImm,
Opcode::Ishl => Opcode::IshlImm,
Opcode::Ushr => Opcode::UshrImm,
Opcode::Sshr => Opcode::SshrImm,
Opcode::Isub => {
imm = imm.wrapping_neg();
Opcode::IaddImm
}
Opcode::Ifcmp => Opcode::IfcmpImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty.bytes() <= native_word_width {
pos.func
.dfg
.replace(inst)
.BinaryImm64(new_opcode, ty, imm, args[0]);
// Repeat for BinaryImm simplification.
simplify(pos, inst, native_word_width);
}
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
let new_opcode = match opcode {
Opcode::Iadd => Opcode::IaddImm,
Opcode::Imul => Opcode::ImulImm,
Opcode::Band => Opcode::BandImm,
Opcode::Bor => Opcode::BorImm,
Opcode::Bxor => Opcode::BxorImm,
Opcode::Isub => Opcode::IrsubImm,
_ => return,
};
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty.bytes() <= native_word_width {
pos.func
.dfg
.replace(inst)
.BinaryImm64(new_opcode, ty, imm, args[1]);
}
}
}
InstructionData::Unary { opcode, arg } => {
if let Opcode::AdjustSpDown = opcode {
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
// Note this works for both positive and negative immediate values.
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
}
}
}
InstructionData::BinaryImm64 { opcode, arg, imm } => {
let ty = pos.func.dfg.ctrl_typevar(inst);
let mut arg = arg;
let mut imm = imm;
match opcode {
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::BorImm
| Opcode::BandImm
| Opcode::BxorImm => {
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
if let InstructionData::BinaryImm64 {
opcode: prev_opcode,
arg: prev_arg,
imm: prev_imm,
} = &pos.func.dfg[arg_inst]
{
if opcode == *prev_opcode
&& ty == pos.func.dfg.ctrl_typevar(arg_inst)
{
let lhs: i64 = imm.into();
let rhs: i64 = (*prev_imm).into();
let new_imm = match opcode {
Opcode::BorImm => lhs | rhs,
Opcode::BandImm => lhs & rhs,
Opcode::BxorImm => lhs ^ rhs,
Opcode::IaddImm => lhs.wrapping_add(rhs),
Opcode::ImulImm => lhs.wrapping_mul(rhs),
_ => panic!("can't happen"),
};
let new_imm = immediates::Imm64::from(new_imm);
let new_arg = *prev_arg;
pos.func
.dfg
.replace(inst)
.BinaryImm64(opcode, ty, new_imm, new_arg);
imm = new_imm;
arg = new_arg;
}
}
}
}
Opcode::UshrImm | Opcode::SshrImm => {
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
{
return;
}
}
_ => {}
};
// Replace operations that are no-ops.
match (opcode, imm.into()) {
(Opcode::IaddImm, 0)
| (Opcode::ImulImm, 1)
| (Opcode::SdivImm, 1)
| (Opcode::UdivImm, 1)
| (Opcode::BorImm, 0)
| (Opcode::BandImm, -1)
| (Opcode::BxorImm, 0)
| (Opcode::RotlImm, 0)
| (Opcode::RotrImm, 0)
| (Opcode::IshlImm, 0)
| (Opcode::UshrImm, 0)
| (Opcode::SshrImm, 0) => {
// Alias the result value with the original argument.
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
}
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
// Replace by zero.
pos.func.dfg.replace(inst).iconst(ty, 0);
}
(Opcode::BorImm, -1) => {
// Replace by minus one.
pos.func.dfg.replace(inst).iconst(ty, -1);
}
_ => {}
}
}
InstructionData::IntCompare { opcode, cond, args } => {
debug_assert_eq!(opcode, Opcode::Icmp);
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
}
}
}
InstructionData::CondTrap { .. }
| InstructionData::Branch { .. }
| InstructionData::Ternary {
opcode: Opcode::Select,
..
} => {
// Fold away a redundant `bint`.
let condition_def = {
let args = pos.func.dfg.inst_args(inst);
pos.func.dfg.value_def(args[0])
};
if let ValueDef::Result(def_inst, _) = condition_def {
if let InstructionData::Unary {
opcode: Opcode::Bint,
arg: bool_val,
} = pos.func.dfg[def_inst]
{
let args = pos.func.dfg.inst_args_mut(inst);
args[0] = bool_val;
}
}
}
InstructionData::Ternary {
opcode: Opcode::Bitselect,
args,
} => {
let old_cond_type = pos.func.dfg.value_type(args[0]);
if !old_cond_type.is_vector() {
return;
}
// Replace bitselect with vselect if each lane of controlling mask is either
// all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
// while vselect can be encoded using single BLEND instruction.
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
InstructionData::Unary {
opcode: Opcode::RawBitcast,
arg,
} => {
// If controlling mask is raw-bitcasted boolean vector then
// we know each lane is either all zeroes or ones,
// so we can use vselect instruction instead.
let arg_type = pos.func.dfg.value_type(arg);
if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
return;
}
(arg, arg_type)
}
InstructionData::UnaryConst {
opcode: Opcode::Vconst,
constant_handle,
} => {
// If each byte of controlling mask is 0x00 or 0xFF then
// we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
// Bitselect operates at bit level, so the lane types don't matter.
let const_data = pos.func.dfg.constants.get(constant_handle);
if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
return;
}
let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
}
_ => return,
};
let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
let old_arg_type = pos.func.dfg.value_type(args[1]);
if arg_type != old_arg_type {
// Operands types must match, we need to add bitcasts.
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
let ret = pos.ins().vselect(cond_val, arg1, arg2);
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
} else {
pos.func
.dfg
.replace(inst)
.vselect(cond_val, args[1], args[2]);
}
}
}
_ => {}
}
}
struct BranchOptInfo {
br_inst: Inst,
cmp_arg: Value,
args: ValueList,
new_opcode: Opcode,
}
/// Fold comparisons into branch operations when possible.
///
/// This matches against operations which compare against zero, then use the
/// result in a `brz` or `brnz` branch. It folds those two operations into a
/// single `brz` or `brnz`.
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
let mut info = if let InstructionData::Branch {
opcode: br_opcode,
args: ref br_args,
..
} = pos.func.dfg[inst]
{
let first_arg = {
let args = pos.func.dfg.inst_args(inst);
args[0]
};
let icmp_inst =
if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
icmp_inst
} else {
return;
};
if let InstructionData::IntCompareImm {
opcode: Opcode::IcmpImm,
arg: cmp_arg,
cond: cmp_cond,
imm: cmp_imm,
} = pos.func.dfg[icmp_inst]
{
let cmp_imm: i64 = cmp_imm.into();
if cmp_imm != 0 {
return;
}
// icmp_imm returns non-zero when the comparison is true. So, if
// we're branching on zero, we need to invert the condition.
let cond = match br_opcode {
Opcode::Brz => cmp_cond.inverse(),
Opcode::Brnz => cmp_cond,
_ => return,
};
let new_opcode = match cond {
IntCC::Equal => Opcode::Brz,
IntCC::NotEqual => Opcode::Brnz,
_ => return,
};
BranchOptInfo {
br_inst: inst,
cmp_arg,
args: br_args.clone(),
new_opcode,
}
} else {
return;
}
} else {
return;
};
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
*opcode = info.new_opcode;
} else {
panic!();
}
}
}
/// The main pre-opt pass.
pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
let _tt = timing::preopt();
let mut pos = FuncCursor::new(func);
let native_word_width = isa.pointer_bytes();
let native_word_width = isa.pointer_bytes() as u32;
let mut optimizer = simplify::peephole_optimizer(isa);
while let Some(block) = pos.next_block() {
while let Some(inst) = pos.next_inst() {
// Apply basic simplifications.
simplify(&mut pos, inst, native_word_width as u32);
simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
// Try to transform divide-by-constant into simpler operations.
if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
@ -960,7 +1099,6 @@ pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn Targ
continue;
}
branch_opt(&mut pos, inst);
branch_order(&mut pos, cfg, block, inst);
}
}

Просмотреть файл

@ -62,6 +62,7 @@ define_passes! {
gvn: "Global value numbering",
licm: "Loop invariant code motion",
unreachable_code: "Remove unreachable blocks",
remove_constant_phis: "Remove constant phi-nodes",
regalloc: "Register allocation",
ra_liveness: "RA liveness analysis",

Просмотреть файл

@ -18,9 +18,9 @@ use serde::{Deserialize, Serialize};
pub struct ValueLocRange {
/// The ValueLoc containing a ValueLabel during this range.
pub loc: ValueLoc,
/// The start of the range.
/// The start of the range. It is an offset in the generated code.
pub start: u32,
/// The end of the range.
/// The end of the range. It is an offset in the generated code.
pub end: u32,
}
@ -91,6 +91,11 @@ pub fn build_value_labels_ranges<T>(
where
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
{
// FIXME(#1523): New-style backend does not yet have debug info.
if isa.get_mach_backend().is_some() {
return HashMap::new();
}
let values_labels = build_value_labels_index::<T>(func);
let mut blocks = func.layout.blocks().collect::<Vec<_>>();

Просмотреть файл

@ -756,10 +756,10 @@ impl<'a> Verifier<'a> {
| UnaryIeee64 { .. }
| UnaryBool { .. }
| Binary { .. }
| BinaryImm { .. }
| BinaryImm8 { .. }
| BinaryImm64 { .. }
| Ternary { .. }
| InsertLane { .. }
| ExtractLane { .. }
| TernaryImm8 { .. }
| Shuffle { .. }
| IntCompare { .. }
| IntCompareImm { .. }
@ -1912,20 +1912,20 @@ impl<'a> Verifier<'a> {
Ok(())
}
}
ir::InstructionData::ExtractLane {
ir::InstructionData::BinaryImm8 {
opcode: ir::instructions::Opcode::Extractlane,
lane,
imm: lane,
arg,
..
}
| ir::InstructionData::InsertLane {
| ir::InstructionData::TernaryImm8 {
opcode: ir::instructions::Opcode::Insertlane,
lane,
imm: lane,
args: [arg, _],
..
} => {
// We must be specific about the opcodes above because other instructions are using
// the ExtractLane/InsertLane formats.
// the same formats.
let ty = self.func.dfg.value_type(arg);
if u16::from(lane) >= ty.lane_count() {
errors.fatal((

Просмотреть файл

@ -508,7 +508,8 @@ pub fn write_operands(
constant_handle, ..
} => write!(w, " {}", constant_handle),
Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]),
BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
BinaryImm8 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
BinaryImm64 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]),
MultiAry { ref args, .. } => {
if args.is_empty() {
@ -518,8 +519,7 @@ pub fn write_operands(
}
}
NullAry { .. } => write!(w, " "),
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm),
Shuffle { mask, args, .. } => {
let data = dfg.immediates.get(mask).expect(
"Expected the shuffle mask to already be inserted into the immediates table",

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"cd1dd7e4040349ff8e5e88cbc3273c2b52cb411853933de6aea8976a1a99445f","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"f6d738a46f1dca8b0c82a5910d86cd572a3585ab7ef9f73dac96962529069190","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"dccb3dd6fc87eba0101de56417f21cab67a4394831df9fa41e3bbddb70cdf694","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
{"files":{"Cargo.toml":"c4ee5d42f3f76a1458ec0d97b5777569906819fe5b4002512de0e69814754c53","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}

Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-entity"
version = "0.63.0"
version = "0.64.0"
description = "Data structures using entity references as mapping keys"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-entity"

Просмотреть файл

@ -85,6 +85,10 @@ macro_rules! entity_impl {
fn reserved_value() -> $entity {
$entity($crate::__core::u32::MAX)
}
fn is_reserved_value(&self) -> bool {
self.0 == $crate::__core::u32::MAX
}
}
impl $entity {

Просмотреть файл

@ -11,9 +11,11 @@ use core::fmt;
use core::mem;
/// Types that have a reserved value which can't be created any other way.
pub trait ReservedValue: Eq {
pub trait ReservedValue {
/// Create an instance of the reserved value.
fn reserved_value() -> Self;
/// Checks whether value is the reserved one.
fn is_reserved_value(&self) -> bool;
}
/// Packed representation of `Option<T>`.
@ -23,12 +25,12 @@ pub struct PackedOption<T: ReservedValue>(T);
impl<T: ReservedValue> PackedOption<T> {
/// Returns `true` if the packed option is a `None` value.
pub fn is_none(&self) -> bool {
self.0 == T::reserved_value()
self.0.is_reserved_value()
}
/// Returns `true` if the packed option is a `Some` value.
pub fn is_some(&self) -> bool {
self.0 != T::reserved_value()
!self.0.is_reserved_value()
}
/// Expand the packed option into a normal `Option`.
@ -75,7 +77,7 @@ impl<T: ReservedValue> From<T> for PackedOption<T> {
/// Convert `t` into a packed `Some(x)`.
fn from(t: T) -> Self {
debug_assert!(
t != T::reserved_value(),
!t.is_reserved_value(),
"Can't make a PackedOption from the reserved value."
);
Self(t)
@ -123,6 +125,10 @@ mod tests {
fn reserved_value() -> Self {
NoC(13)
}
fn is_reserved_value(&self) -> bool {
self.0 == 13
}
}
#[test]
@ -145,6 +151,10 @@ mod tests {
fn reserved_value() -> Self {
Ent(13)
}
fn is_reserved_value(&self) -> bool {
self.0 == 13
}
}
#[test]

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"d152c6553c0091b43d9ea0cd547dc49440e6321eb792bf47fdd3245aed046513","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"f750cc995c66635dab7f2b977266cf9235d984b585ab8145bdb858ea8e1b0fb4","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
{"files":{"Cargo.toml":"084cc46ba2d09a2ee8085c37be8624b3cc249d381f1cbee6df468930ce15e415","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"d1d8477572f70cc28f71424af272d9eec0adf58af657ff153c4acbbb39822a50","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}

Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-frontend"
version = "0.63.0"
version = "0.64.0"
description = "Cranelift IR builder helper"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-frontend"
@ -11,7 +11,7 @@ readme = "README.md"
edition = "2018"
[dependencies]
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false }
target-lexicon = "0.10"
log = { version = "0.4.6", default-features = false }
hashbrown = { version = "0.7", optional = true }

Просмотреть файл

@ -272,6 +272,12 @@ impl<'a> FunctionBuilder<'a> {
/// In order to use a variable in a `use_var`, you need to declare its type with this method.
pub fn declare_var(&mut self, var: Variable, ty: Type) {
debug_assert_eq!(
self.func_ctx.types[var],
types::INVALID,
"variable {:?} is declared twice",
var
);
self.func_ctx.types[var] = ty;
}
@ -285,6 +291,12 @@ impl<'a> FunctionBuilder<'a> {
var
)
});
debug_assert_ne!(
ty,
types::INVALID,
"variable {:?} is used but its type has not been declared",
var
);
self.func_ctx
.ssa
.use_var(self.func, var, ty, self.position.unwrap())

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"107a12d0bc1ee99c8ffd9cf746c4d06040a90bd5769fc29d36a88371d09a67b2","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"cce724251d4abc08c6492e1e25c138ab5a0d11e9ac90bc573652b18e034f56ed","src/code_translator.rs":"4b70704fd50b24cd695d0a469d92f06d4a4fc328f24247a6c7a1ba39ac301ee0","src/environ/dummy.rs":"49bce7a8eb9f21a61c12db537b51ab6bdb3d0e1eb6253084268256d96cae68a5","src/environ/mod.rs":"b6f33f619090ff497b4e22150d77a290f259716374ac2e377b73c47cd1dafe85","src/environ/spec.rs":"3a1543f99bff340c7f6bbe3f7cb8e8ec829e4139957f3c578d5b03e29df50f9e","src/func_translator.rs":"a165063eafedbb8e6b632996f747eeb49a3d6f8a70cab6d741abfc4fd9af892d","src/lib.rs":"05b9994c062faf2065046d1e4d7caffb26823816f367d77ede6918e24fcfa6b0","src/module_translator.rs":"bcdf5a84226b726a73f4be0acb0318ca89c82584460101378e73021d85bd4485","src/sections_translator.rs":"8c4c24308332c63d16fcf19693a7ecff2239e73b4752b0d3830b273fabcee9f1","src/state/func_state.rs":"b114522784984a7cc26a3549c7c17f842885e1232254de81d938f9d155f95aa6","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"2f299b043deb806b48583fe54bbb46708f7d8a1454b7be0eb285568064e5a7f9","src/translation_utils.rs":"a1723cf6c216edd8aa845c61b80907167569f0c830344e0f2dc86a7232d45c5c","tests/wasm_testsuite.rs":"730304f139371e5ef3fd913ec271fc4db181869b447c6ed26c54313b5c31495c"},"package":null}
{"files":{"Cargo.toml":"3dd16e5f91cb20bc9afaff9880e1035d1c33c68851f593e6f2c5a0c92e292133","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"cce724251d4abc08c6492e1e25c138ab5a0d11e9ac90bc573652b18e034f56ed","src/code_translator.rs":"f9befe6f5a53eede1e9937abe0bced442f8c0276996bfb4d77c27e81d4746b4f","src/environ/dummy.rs":"07b6510a7141b92769c914e37386790486f92b691beb0876b8590f2ae5489ee4","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"2ff8524cd592efdef67e5f8d06d144f7d628dee8183848ff4f5e35850f3ce550","src/func_translator.rs":"eb1fcea970407eda872984808e9a3e3a3297c2dea6e3a600ee7116ca89c7b49f","src/lib.rs":"6d3662b3f219a3f7a26f6b44b7921a19da1d892cf78f5a4434fdced5753b069f","src/module_translator.rs":"bcdf5a84226b726a73f4be0acb0318ca89c82584460101378e73021d85bd4485","src/sections_translator.rs":"db567511e273a9e383b18a15fc47f74a1247cbe13f120d7656c21660be53ab78","src/state/func_state.rs":"b114522784984a7cc26a3549c7c17f842885e1232254de81d938f9d155f95aa6","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"20082fded6a8d3637eccbda4465355d8d9fab0a1cd8222accb10cb3e06543689","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}

14
third_party/rust/cranelift-wasm/Cargo.toml поставляемый
Просмотреть файл

@ -1,6 +1,6 @@
[package]
name = "cranelift-wasm"
version = "0.63.0"
version = "0.64.0"
authors = ["The Cranelift Project Developers"]
description = "Translator from WebAssembly to Cranelift IR"
documentation = "https://docs.rs/cranelift-wasm"
@ -12,20 +12,20 @@ keywords = ["webassembly", "wasm"]
edition = "2018"
[dependencies]
wasmparser = { version = "0.51.0", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.63.0" }
cranelift-frontend = { path = "../frontend", version = "0.63.0", default-features = false }
wasmparser = { version = "0.57.0", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.64.0" }
cranelift-frontend = { path = "../frontend", version = "0.64.0", default-features = false }
hashbrown = { version = "0.7", optional = true }
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
thiserror = "1.0.4"
[dev-dependencies]
wat = "1.0.9"
wat = "1.0.18"
target-lexicon = "0.10"
# Enable the riscv feature for cranelift-codegen, as some tests require it
cranelift-codegen = { path = "../codegen", version = "0.63.0", default-features = false, features = ["riscv"] }
cranelift-codegen = { path = "../codegen", version = "0.64.0", default-features = false, features = ["riscv"] }
[features]
default = ["std"]

Просмотреть файл

@ -125,7 +125,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
GlobalVariable::Memory { gv, offset, ty } => {
let addr = builder.ins().global_value(environ.pointer_type(), gv);
let flags = ir::MemFlags::trusted();
let val = state.pop1();
let mut val = state.pop1();
// Ensure SIMD values are cast to their default Cranelift type, I8x16.
if ty.is_vector() {
val = optionally_bitcast_vector(val, I8X16, builder);
}
debug_assert_eq!(ty, builder.func.dfg.value_type(val));
builder.ins().store(flags, val, addr, offset);
}
@ -357,7 +361,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
// We signal that all the code that follows until the next End is unreachable
frame.set_branched_to_exit();
let return_count = if frame.is_loop() {
0
frame.num_param_values()
} else {
frame.num_return_values()
};
@ -1035,8 +1039,8 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::F32Le | Operator::F64Le => {
translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
}
Operator::RefNull => state.push1(builder.ins().null(environ.reference_type())),
Operator::RefIsNull => {
Operator::RefNull { ty: _ } => state.push1(builder.ins().null(environ.reference_type())),
Operator::RefIsNull { ty: _ } => {
let arg = state.pop1();
let val = builder.ins().is_null(arg);
let val_int = builder.ins().bint(I32, val);
@ -1167,23 +1171,26 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
)?);
}
Operator::TableGrow { table } => {
let table_index = TableIndex::from_u32(*table);
let delta = state.pop1();
let init_value = state.pop1();
state.push1(environ.translate_table_grow(
builder.cursor(),
*table,
table_index,
delta,
init_value,
)?);
}
Operator::TableGet { table } => {
let table_index = TableIndex::from_u32(*table);
let index = state.pop1();
state.push1(environ.translate_table_get(builder.cursor(), *table, index)?);
state.push1(environ.translate_table_get(builder.cursor(), table_index, index)?);
}
Operator::TableSet { table } => {
let table_index = TableIndex::from_u32(*table);
let value = state.pop1();
let index = state.pop1();
environ.translate_table_set(builder.cursor(), *table, value, index)?;
environ.translate_table_set(builder.cursor(), table_index, value, index)?;
}
Operator::TableCopy {
dst_table: dst_table_index,
@ -1206,10 +1213,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
)?;
}
Operator::TableFill { table } => {
let table_index = TableIndex::from_u32(*table);
let len = state.pop1();
let val = state.pop1();
let dest = state.pop1();
environ.translate_table_fill(builder.cursor(), *table, dest, val, len)?;
environ.translate_table_fill(builder.cursor(), table_index, dest, val, len)?;
}
Operator::TableInit {
segment,
@ -1302,7 +1310,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let ty = type_of(op);
let reduced = builder.ins().ireduce(ty.lane_type(), replacement);
let vector = optionally_bitcast_vector(vector, ty, builder);
state.push1(builder.ins().insertlane(vector, *lane, reduced))
state.push1(builder.ins().insertlane(vector, reduced, *lane))
}
Operator::I32x4ReplaceLane { lane }
| Operator::I64x2ReplaceLane { lane }
@ -1310,7 +1318,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::F64x2ReplaceLane { lane } => {
let (vector, replacement) = state.pop2();
let vector = optionally_bitcast_vector(vector, type_of(op), builder);
state.push1(builder.ins().insertlane(vector, *lane, replacement))
state.push1(builder.ins().insertlane(vector, replacement, *lane))
}
Operator::V8x16Shuffle { lanes, .. } => {
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
@ -1375,7 +1383,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().ineg(a))
}
Operator::I16x8Mul | Operator::I32x4Mul => {
Operator::I16x8Mul | Operator::I32x4Mul | Operator::I64x2Mul => {
let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().imul(a, b))
}
@ -1402,7 +1410,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16Shl | Operator::I16x8Shl | Operator::I32x4Shl | Operator::I64x2Shl => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
@ -1411,16 +1419,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16ShrU | Operator::I16x8ShrU | Operator::I32x4ShrU | Operator::I64x2ShrU => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
state.push1(builder.ins().ushr(bitcast_a, b_mod_bitwidth))
}
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS => {
Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS | Operator::I64x2ShrS => {
let (a, b) = state.pop2();
let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
let bitwidth = i64::from(builder.func.dfg.value_type(a).bits());
let bitwidth = i64::from(type_of(op).lane_bits());
// The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
// we do `b AND 15`; this means fewer instructions than `iconst + urem`.
let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
@ -1435,18 +1443,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
// operands must match (hence the bitcast).
state.push1(builder.ins().bitselect(bitcast_c, bitcast_a, bitcast_b))
}
Operator::I8x16AnyTrue
| Operator::I16x8AnyTrue
| Operator::I32x4AnyTrue
| Operator::I64x2AnyTrue => {
Operator::I8x16AnyTrue | Operator::I16x8AnyTrue | Operator::I32x4AnyTrue => {
let a = pop1_with_bitcast(state, type_of(op), builder);
let bool_result = builder.ins().vany_true(a);
state.push1(builder.ins().bint(I32, bool_result))
}
Operator::I8x16AllTrue
| Operator::I16x8AllTrue
| Operator::I32x4AllTrue
| Operator::I64x2AllTrue => {
Operator::I8x16AllTrue | Operator::I16x8AllTrue | Operator::I32x4AllTrue => {
let a = pop1_with_bitcast(state, type_of(op), builder);
let bool_result = builder.ins().vall_true(a);
state.push1(builder.ins().bint(I32, bool_result))
@ -1542,16 +1544,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().fcvt_from_sint(F32X4, a))
}
Operator::I8x16Mul
| Operator::I64x2Mul
| Operator::I64x2ShrS
| Operator::I32x4TruncSatF32x4S
Operator::I32x4TruncSatF32x4S
| Operator::I32x4TruncSatF32x4U
| Operator::I64x2TruncSatF64x2S
| Operator::I64x2TruncSatF64x2U
| Operator::F32x4ConvertI32x4U
| Operator::F64x2ConvertI64x2S
| Operator::F64x2ConvertI64x2U { .. }
| Operator::I8x16Abs
| Operator::I16x8Abs
| Operator::I32x4Abs
| Operator::I8x16NarrowI16x8S { .. }
| Operator::I8x16NarrowI16x8U { .. }
| Operator::I16x8NarrowI32x4S { .. }
@ -1566,6 +1564,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I32x4WidenHighI16x8U { .. } => {
return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
}
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
}
};
Ok(())
}
@ -1991,8 +1993,7 @@ fn type_of(operator: &Operator) -> Type {
| Operator::I8x16MinU
| Operator::I8x16MaxS
| Operator::I8x16MaxU
| Operator::I8x16RoundingAverageU
| Operator::I8x16Mul => I8X16,
| Operator::I8x16RoundingAverageU => I8X16,
Operator::I16x8Splat
| Operator::V16x8LoadSplat { .. }
@ -2063,15 +2064,12 @@ fn type_of(operator: &Operator) -> Type {
| Operator::I64x2ExtractLane { .. }
| Operator::I64x2ReplaceLane { .. }
| Operator::I64x2Neg
| Operator::I64x2AnyTrue
| Operator::I64x2AllTrue
| Operator::I64x2Shl
| Operator::I64x2ShrS
| Operator::I64x2ShrU
| Operator::I64x2Add
| Operator::I64x2Sub
| Operator::F64x2ConvertI64x2S
| Operator::F64x2ConvertI64x2U => I64X2,
| Operator::I64x2Mul => I64X2,
Operator::F32x4Splat
| Operator::F32x4ExtractLane { .. }
@ -2111,9 +2109,7 @@ fn type_of(operator: &Operator) -> Type {
| Operator::F64x2Mul
| Operator::F64x2Div
| Operator::F64x2Min
| Operator::F64x2Max
| Operator::I64x2TruncSatF64x2S
| Operator::I64x2TruncSatF64x2U => F64X2,
| Operator::F64x2Max => F64X2,
_ => unimplemented!(
"Currently only SIMD instructions are mapped to their return type; the \

Просмотреть файл

@ -6,7 +6,8 @@
//! [Wasmtime]: https://github.com/bytecodealliance/wasmtime
use crate::environ::{
FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment, WasmResult,
FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment,
WasmFuncType, WasmResult,
};
use crate::func_translator::FuncTranslator;
use crate::state::ModuleTranslationState;
@ -433,7 +434,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
fn translate_table_grow(
&mut self,
mut pos: FuncCursor,
_table_index: u32,
_table_index: TableIndex,
_delta: ir::Value,
_init_value: ir::Value,
) -> WasmResult<ir::Value> {
@ -443,7 +444,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
fn translate_table_get(
&mut self,
mut pos: FuncCursor,
_table_index: u32,
_table_index: TableIndex,
_index: ir::Value,
) -> WasmResult<ir::Value> {
Ok(pos.ins().null(self.reference_type()))
@ -452,7 +453,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
fn translate_table_set(
&mut self,
_pos: FuncCursor,
_table_index: u32,
_table_index: TableIndex,
_value: ir::Value,
_index: ir::Value,
) -> WasmResult<()> {
@ -476,7 +477,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
fn translate_table_fill(
&mut self,
_pos: FuncCursor,
_table_index: u32,
_table_index: TableIndex,
_dst: ir::Value,
_val: ir::Value,
_len: ir::Value,
@ -534,7 +535,7 @@ impl TargetEnvironment for DummyEnvironment {
}
impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()> {
fn declare_signature(&mut self, _wasm: &WasmFuncType, sig: ir::Signature) -> WasmResult<()> {
self.info.signatures.push(sig);
Ok(())
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше