Bug 1706427 - Update wasmparser and cranelift. r=rhunt

Differential Revision: https://phabricator.services.mozilla.com/D112806
This commit is contained in:
Yury Delendik 2021-05-04 14:59:40 +00:00
Родитель cec8f0915a
Коммит 50fe06fce0
286 изменённых файлов: 20338 добавлений и 19747 удалений

Просмотреть файл

@ -47,21 +47,6 @@ git = "https://github.com/mozilla/application-services"
replace-with = "vendored-sources"
rev = "8a576fbe79199fa8664f64285524017f74ebcc5f"
[source."https://github.com/mozilla-spidermonkey/wasmtime"]
git = "https://github.com/mozilla-spidermonkey/wasmtime"
replace-with = "vendored-sources"
rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
[source."https://github.com/mozilla-spidermonkey/wasm-tools"]
git = "https://github.com/mozilla-spidermonkey/wasm-tools"
replace-with = "vendored-sources"
rev = "1b7763faa484e62752538b78e7a69883f4faceee"
[source."https://github.com/mozilla-spidermonkey/regalloc.rs"]
git = "https://github.com/mozilla-spidermonkey/regalloc.rs"
replace-with = "vendored-sources"
rev = "fc5d1d33317b0fbd36725757f80a95127eff5109"
[source."https://github.com/mozilla-spidermonkey/jsparagus"]
git = "https://github.com/mozilla-spidermonkey/jsparagus"
replace-with = "vendored-sources"
@ -122,6 +107,11 @@ git = "https://github.com/gfx-rs/d3d12-rs"
replace-with = "vendored-sources"
rev = "be19a243b86e0bafb9937d661fc8eabb3e42b44e"
[source."https://github.com/bytecodealliance/wasmtime"]
git = "https://github.com/bytecodealliance/wasmtime"
replace-with = "vendored-sources"
rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
[source."https://github.com/PLSysSec/rlbox_lucet_sandbox/"]
git = "https://github.com/PLSysSec/rlbox_lucet_sandbox/"
replace-with = "vendored-sources"

70
Cargo.lock сгенерированный
Просмотреть файл

@ -779,42 +779,42 @@ dependencies = [
[[package]]
name = "cranelift-bforest"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
dependencies = [
"cranelift-entity 0.68.0",
"cranelift-entity 0.73.0",
]
[[package]]
name = "cranelift-codegen"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
dependencies = [
"byteorder",
"cranelift-bforest",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-entity 0.68.0",
"cranelift-entity 0.73.0",
"log",
"regalloc",
"smallvec",
"target-lexicon 0.11.0",
"target-lexicon 0.12.0",
"thiserror",
]
[[package]]
name = "cranelift-codegen-meta"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
dependencies = [
"cranelift-codegen-shared",
"cranelift-entity 0.68.0",
"cranelift-entity 0.73.0",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
[[package]]
name = "cranelift-entity"
@ -823,33 +823,33 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=cd07861d1c9
[[package]]
name = "cranelift-entity"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
[[package]]
name = "cranelift-frontend"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
dependencies = [
"cranelift-codegen",
"log",
"smallvec",
"target-lexicon 0.11.0",
"target-lexicon 0.12.0",
]
[[package]]
name = "cranelift-wasm"
version = "0.68.0"
source = "git+https://github.com/mozilla-spidermonkey/wasmtime?rev=a25399760e9f12b679aa267dd2af7cfedc72bb71#a25399760e9f12b679aa267dd2af7cfedc72bb71"
version = "0.73.0"
source = "git+https://github.com/bytecodealliance/wasmtime?rev=6b77786a6e758e91da9484a1c80b6fa5f88e1b3d#6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
dependencies = [
"cranelift-codegen",
"cranelift-entity 0.68.0",
"cranelift-entity 0.73.0",
"cranelift-frontend",
"itertools 0.9.0",
"itertools 0.10.0",
"log",
"smallvec",
"thiserror",
"wasmparser 0.67.0",
"wasmparser",
]
[[package]]
@ -2539,9 +2539,9 @@ dependencies = [
[[package]]
name = "itertools"
version = "0.9.0"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319"
dependencies = [
"either",
]
@ -2648,7 +2648,7 @@ version = "0.1.0"
dependencies = [
"jsrust_shared",
"mozglue-static",
"wasmparser 0.48.2",
"wasmparser",
"wat",
]
@ -4253,7 +4253,8 @@ dependencies = [
[[package]]
name = "regalloc"
version = "0.0.31"
source = "git+https://github.com/mozilla-spidermonkey/regalloc.rs?rev=fc5d1d33317b0fbd36725757f80a95127eff5109#fc5d1d33317b0fbd36725757f80a95127eff5109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "571f7f397d61c4755285cd37853fe8e03271c243424a907415909379659381c5"
dependencies = [
"log",
"rustc-hash",
@ -5078,9 +5079,9 @@ checksum = "6f4c118a7a38378f305a9e111fcb2f7f838c0be324bfb31a77ea04f7f6e684b4"
[[package]]
name = "target-lexicon"
version = "0.11.0"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe2635952a442a01fd4cb53d98858b5e4bb461b02c0d111f22f31772e3e7a8b2"
checksum = "64ae3b39281e4b14b8123bdbaddd472b7dfe215e444181f2f9d2443c2444f834"
[[package]]
name = "tempfile"
@ -5680,20 +5681,15 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasmparser"
version = "0.48.2"
version = "0.77.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
[[package]]
name = "wasmparser"
version = "0.67.0"
source = "git+https://github.com/mozilla-spidermonkey/wasm-tools?rev=1b7763faa484e62752538b78e7a69883f4faceee#1b7763faa484e62752538b78e7a69883f4faceee"
checksum = "b35c86d22e720a07d954ebbed772d01180501afe7d03d464f413bb5f8914a8d6"
[[package]]
name = "wast"
version = "35.0.1"
version = "35.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a5800e9f86a1eae935e38bea11e60fd253f6d514d153fb39b3e5535a7b37b56"
checksum = "2ef140f1b49946586078353a453a1d28ba90adfc54dde75710bc1931de204d68"
dependencies = [
"leb128",
]

Просмотреть файл

@ -86,12 +86,12 @@ minidump_writer_linux = { git = "https://github.com/msirringhaus/minidump_writer
xmldecl = { git = "https://github.com/hsivonen/xmldecl", rev="a74f7df5bf6cb11194fb74daa0c3cf42f326fd90" }
[patch.crates-io.cranelift-codegen]
git = "https://github.com/mozilla-spidermonkey/wasmtime"
rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
git = "https://github.com/bytecodealliance/wasmtime"
rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
[patch.crates-io.cranelift-wasm]
git = "https://github.com/mozilla-spidermonkey/wasmtime"
rev = "a25399760e9f12b679aa267dd2af7cfedc72bb71"
git = "https://github.com/bytecodealliance/wasmtime"
rev = "6b77786a6e758e91da9484a1c80b6fa5f88e1b3d"
# Patch autocfg to hide rustc output. Workaround for https://github.com/cuviper/autocfg/issues/30
[patch.crates-io.autocfg]

Просмотреть файл

@ -9,18 +9,18 @@
//
// is equivalent to
//
// (iMxN.mul (iMxN.widen_{high,low}_iKxL_{s,u} A)
// (iMxN.widen_{high,low}_iKxL_{s,u} B))
// (iMxN.mul (iMxN.extend_{high,low}_iKxL_{s,u} A)
// (iMxN.extend_{high,low}_iKxL_{s,u} B))
//
// It doesn't really matter what the inputs are, we can test this almost
// blindly.
//
// Unfortunately, we do not yet have i64x2.widen_* so we introduce a helper
// Unfortunately, we do not yet have i64x2.extend_* so we introduce a helper
// function to compute that.
function makeExtMulTest(wide, narrow, part, signed) {
let widener = (wide == 'i64x2') ?
`call $${wide}_widen_${part}_${narrow}_${signed}` :
`call $${wide}_extend_${part}_${narrow}_${signed}` :
`${wide}.extend_${part}_${narrow}_${signed}`;
return `
(func (export "${wide}_extmul_${part}_${narrow}_${signed}")
@ -36,21 +36,21 @@ function makeExtMulTest(wide, narrow, part, signed) {
var ins = wasmEvalText(`
(module
(memory (export "mem") 1 1)
(func $i64x2_widen_low_i32x4_s (param v128) (result v128)
(func $i64x2_extend_low_i32x4_s (param v128) (result v128)
(i64x2.shr_s (i8x16.shuffle 16 16 16 16 0 1 2 3 16 16 16 16 4 5 6 7
(local.get 0)
(v128.const i32x4 0 0 0 0))
(i32.const 32)))
(func $i64x2_widen_high_i32x4_s (param v128) (result v128)
(func $i64x2_extend_high_i32x4_s (param v128) (result v128)
(i64x2.shr_s (i8x16.shuffle 16 16 16 16 8 9 10 11 16 16 16 16 12 13 14 15
(local.get 0)
(v128.const i32x4 0 0 0 0))
(i32.const 32)))
(func $i64x2_widen_low_i32x4_u (param v128) (result v128)
(func $i64x2_extend_low_i32x4_u (param v128) (result v128)
(i8x16.shuffle 0 1 2 3 16 16 16 16 4 5 6 7 16 16 16 16
(local.get 0)
(v128.const i32x4 0 0 0 0)))
(func $i64x2_widen_high_i32x4_u (param v128) (result v128)
(func $i64x2_extend_high_i32x4_u (param v128) (result v128)
(i8x16.shuffle 8 9 10 11 16 16 16 16 12 13 14 15 16 16 16 16
(local.get 0)
(v128.const i32x4 0 0 0 0)))
@ -132,13 +132,13 @@ assertEq(ins.exports.const_bitmask_i64x2(), 1);
var ins = wasmEvalText(`
(module
(memory (export "mem") 1 1)
(func (export "widen_low_i32x4_s")
(func (export "extend_low_i32x4_s")
(v128.store (i32.const 0) (i64x2.extend_low_i32x4_s (v128.load (i32.const 16)))))
(func (export "widen_high_i32x4_s")
(func (export "extend_high_i32x4_s")
(v128.store (i32.const 0) (i64x2.extend_high_i32x4_s (v128.load (i32.const 16)))))
(func (export "widen_low_i32x4_u")
(func (export "extend_low_i32x4_u")
(v128.store (i32.const 0) (i64x2.extend_low_i32x4_u (v128.load (i32.const 16)))))
(func (export "widen_high_i32x4_u")
(func (export "extend_high_i32x4_u")
(v128.store (i32.const 0) (i64x2.extend_high_i32x4_u (v128.load (i32.const 16))))))`);
var mem32 = new Int32Array(ins.exports.mem.buffer);
@ -148,16 +148,16 @@ var mem64u = new BigUint64Array(ins.exports.mem.buffer);
var as = [205, 1, 192, 3].map((x) => x << 24);
set(mem32, 4, as);
ins.exports.widen_low_i32x4_s();
ins.exports.extend_low_i32x4_s();
assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n])))
ins.exports.widen_high_i32x4_s();
ins.exports.extend_high_i32x4_s();
assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n+2])));
ins.exports.widen_low_i32x4_u();
ins.exports.extend_low_i32x4_u();
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
ins.exports.widen_high_i32x4_u();
ins.exports.extend_high_i32x4_u();
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
// Saturating rounding q-format multiplication.

Просмотреть файл

@ -21,4 +21,4 @@ jsrust_shared = { path = "./shared" }
# Workaround for https://github.com/rust-lang/rust/issues/58393
mozglue-static = { path = "../../../mozglue/static/rust" }
wat = { version = "1.0.37" }
wasmparser = { version = "0.48.2" }
wasmparser = { version = "0.77.0" }

Просмотреть файл

@ -56,20 +56,31 @@ pub unsafe extern "C" fn wasm_code_offsets(
}
let mut offsets = Vec::new();
let mut parser = Parser::new(bytes);
let mut next_input = ParserInput::Default;
while !parser.eof() {
let offset = parser.current_position();
match parser.read_with_input(next_input) {
ParserState::BeginSection { code, .. } if *code != SectionCode::Code => {
next_input = ParserInput::SkipSection;
// Read operators offsets and skip invalid data.
for payload in Parser::new(0).parse_all(bytes) {
if payload.is_err() {
break;
}
match payload.unwrap() {
Payload::CodeSectionEntry(body) => {
let reader = match body.get_operators_reader() {
Ok(r) => r,
Err(_) => {
break;
}
};
for pair in reader.into_iter_with_offsets() {
let offset = match pair {
Ok((_op, offset)) => offset,
Err(_) => {
break;
}
};
offsets.push(offset as u32);
}
}
ParserState::CodeOperator(..) => {
offsets.push(offset as u32);
next_input = ParserInput::Default
}
_ => next_input = ParserInput::Default,
_ => (),
}
}

Просмотреть файл

@ -13,8 +13,8 @@ name = "baldrdash"
# cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
# you should update the following $TOP_LEVEL/Cargo.toml file: look for the
# revision (rev) hashes of both cranelift dependencies (codegen and wasm).
cranelift-codegen = { version = "0.68.0", default-features = false }
cranelift-wasm = { version = "0.68.0" }
cranelift-codegen = { version = "0.73.0", default-features = false }
cranelift-wasm = { version = "0.73.0" }
log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
env_logger = "0.8"
smallvec = "1.0"
@ -24,7 +24,7 @@ bindgen = {version = "0.56", default-features = false} # disable `logging` to re
[features]
default = ['cranelift-codegen/std']
cranelift_x86 = ['cranelift-codegen/x64']
cranelift_x86 = ['cranelift-codegen/x86']
cranelift_arm32 = ['cranelift-codegen/arm32']
cranelift_arm64 = ['cranelift-codegen/arm64']

Просмотреть файл

@ -380,6 +380,9 @@ impl<'module> wasmparser::WasmModuleResources for ModuleEnvironment<'module> {
None
}
}
fn event_at(&self, _at: u32) -> Option<&Self::FuncType> {
panic!("unexpected exception operation");
}
fn global_at(&self, at: u32) -> Option<wasmparser::GlobalType> {
let num_globals = unsafe { low_level::env_num_globals(self.env) };
if (at as usize) < num_globals {

Просмотреть файл

@ -174,6 +174,7 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
deterministic_only: true,
memory64: false,
multi_memory: false,
exceptions: false,
};
let sig_index = self.module_env.func_sig_index(index);
let mut validator =

Просмотреть файл

@ -1390,7 +1390,8 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
Ok(ret.unwrap())
}
fn translate_loop_header(&mut self, mut pos: FuncCursor) -> WasmResult<()> {
fn translate_loop_header(&mut self, builder: &mut FunctionBuilder) -> WasmResult<()> {
let mut pos = builder.cursor();
let interrupt = self.load_interrupt_flag(&mut pos);
pos.ins()
.resumable_trapnz(interrupt, ir::TrapCode::Interrupt);

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"b0ed8fc54833fd48846644e3f59fbead46e7a2ff456194e03d04ce8b95404522","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"4204f6bd3dd43dc307a57dc1b3543fc3d31feb4c5c8e64035578a45d88c725b3","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
{"files":{"Cargo.toml":"7ed6181651b03736af571db011b0c22da1accd1ed581c4637048baeb31ac4460","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"4204f6bd3dd43dc307a57dc1b3543fc3d31feb4c5c8e64035578a45d88c725b3","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}

Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-bforest"
version = "0.68.0"
version = "0.73.0"
description = "A forest of B+-trees"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-bforest"
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
edition = "2018"
[dependencies]
cranelift-entity = { path = "../entity", version = "0.68.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.73.0", default-features = false }
[badges]
maintenance = { status = "experimental" }

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"561ee9a55739ac9716bc2f024e2673d69aefa6edbc4ff8b61a221a1741ed862a","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"a0f5212fa593caf66371f5ee4b15e501939a9407c4663bff6b3ba356b11ca1b4","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"50620fb2a6271a7c9126dc30c433a1bf25646a4d84511f5745650aaaec700f42","src/cdsl/typevar.rs":"3cbe83a09d2402511b20415a8356f848fb82536926386bb42eaaa7740fb2457e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"1ff123ab481b48d82e13363043dfc98eaef837bbf6af485b8259c3863550e29c","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"e9f1645fec6e4b5cfba9b08cfff70f9d1a5ad3b392f5ee9f40cb1a8669a7c689","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"f2d3d1ece43c7f18bd7ef405715cd39f59433d8f33a7fa4d237c1de28528ff7c","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"745ef09f4927b5334d68155fa047910ef96311feef7ec20964bb033c3419cd3c","src/isa/x86/recipes.rs":"744292109344363b2210ac1b42cb4704b4b692aa8bf5583e4230557cf3749298","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"47a5e9fb3b7917cfe817d56dcc77c0470545e451e0f38a875af0531fbd9b6a58","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"14b668244b2afd71197c2dd8469af0e0602d590fcb14252c2b0b40cb9905a4ae","src/shared/immediates.rs":"563fa33accb992eb11a43f0f63259c62a2c44db59801431cc67ceec4b94f2ca3","src/shared/instructions.rs":"21d0f2b041a0bce64d3db614ca003ec9269ba0a31aa5dbdae34cb15e5a59d89f","src/shared/legalize.rs":"eb5f07fa107cadd67483881ccce29cc8fb9b698a0cd4f1d89853aac275cf7bcf","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"e7406ce17fb313fa05397dd8103f74eed67d35170d70b6e546e08954aef2ed87","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
{"files":{"Cargo.toml":"122ed61f7a5d7859ae59f4969444d5b4b3cdf6c035e4aca8040aa93b393601e8","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"b01e6d51991c6bcc15b40c90d53a1bf9c7ecbc046f7fd1fea1798097db26ffb4","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"b2f2c80a8d24cc9523e3d83219fc3251f24898579a6831e45b7fe34ab74b6207","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"9e4f9aebbc81da3bef61ad4fa4f3be38f5494747b465d2cd95c269cdecb06e09","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"e61f37f6185082dcf41cde9e7edba16c5161dbe40cf40580cd7a6973ed8badbc","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"5bee86362ecb06bf974e1bf79f9fb3e50e81a829b311baf660f0a0c64ea65fdb","src/cdsl/type_inference.rs":"b0834b54176811f3c63a76ccb0114c05edd16173088501f794521ae7a1ac443d","src/cdsl/types.rs":"50620fb2a6271a7c9126dc30c433a1bf25646a4d84511f5745650aaaec700f42","src/cdsl/typevar.rs":"3cbe83a09d2402511b20415a8356f848fb82536926386bb42eaaa7740fb2457e","src/cdsl/xform.rs":"10760ea733d0462e2bd3ef636f657fa1817148761b1de6ffbfe0af3222a66438","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"3695066b8b58066a2f3959bb37a5464732a38dc10aebd65711ab6a1b26530a52","src/gen_inst.rs":"1f2eb68d2fca38b9e4b4f28125c5ea943efb2e1f1d927ada0d08a16937da1aba","src/gen_legalizer.rs":"a5e507eb46649a28252582cfc1907c77c9266fec7f92e959a03258bed7d124e9","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"a9001b09a60b28f63baeca6bac63781f48f4d68f65c8105ace1aedcd964b8468","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"a069c34d1fadc9b35aeafbf72cfd89140e5fa8b9136fc51118241591833a5cde","src/isa/mod.rs":"be483f9a406f603e69603f9489a41a53ee02aa0ece07f7ca396956dfe3815f71","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"fba8fdd5d1ebef9cb34f0948f285cd3c63eed498e21bad100a69e316f961b737","src/isa/riscv/recipes.rs":"0f58141903aeb3a76a33b705e2dea3eb74864c42dd8b719d9e4f4f95ad0c5d80","src/isa/x86/encodings.rs":"83619a4b49da1eb7a946e2348f1c232cfc853c23387bab219e488b1118754085","src/isa/x86/instructions.rs":"d4d581448f8f7bd5afb033650af0026468eecc6f4184b3bb7c06232bf08c456b","src/isa/x86/legalize.rs":"186c688dd8ac773f2b2c4c1f1cbdb7a66ca13a8ed90c03f87dfe7fdaa12c15b3","src/isa/x86/mod.rs":"31571c281318e6f9bf17680feb96830983f5c1f9811aa4a89736f99f3d9a1831","src/isa/x86/opcodes.rs":"c1a6e6657c4970f0ecb9287d835e9c0791908db1d10d0d35bb4899779a0d14f8","src/isa/x86/recipes.rs":"744292109344363b2210ac1b42cb4704b4b692aa8bf5583e4230557cf3749298","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"0c5bca85724e51d87ed431b2f783d9352d510ccc42b59170c808a5a041836649","src/lib.rs":"23259ba28aa8f0b3586e9c60f4e67ae50660369f146f2a94249e8cff7d07b27b","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"14b668244b2afd71197c2dd8469af0e0602d590fcb14252c2b0b40cb9905a4ae","src/shared/immediates.rs":"42793948a4a84058059d39212236c10d46efa7f69b21d904735343525819209d","src/shared/instructions.rs":"b328e34c28d86046387372f84551c7ceeb230f6e82d044fba914bf80e2a183f5","src/shared/legalize.rs":"eb5f07fa107cadd67483881ccce29cc8fb9b698a0cd4f1d89853aac275cf7bcf","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"10602e7f4ac1fa307a178490c7e531760d001c1f3a416adacd4458e6ac34ee67","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}

Просмотреть файл

@ -1,19 +1,20 @@
[package]
name = "cranelift-codegen-meta"
authors = ["The Cranelift Project Developers"]
version = "0.68.0"
version = "0.73.0"
description = "Metaprogram for cranelift-codegen code generator library"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"
readme = "README.md"
edition = "2018"
[package.metadata.docs.rs]
rustdoc-args = [ "--document-private-items" ]
# FIXME(rust-lang/cargo#9300): uncomment once that lands
# [package.metadata.docs.rs]
# rustdoc-args = [ "--document-private-items" ]
[dependencies]
cranelift-codegen-shared = { path = "../shared", version = "0.68.0" }
cranelift-entity = { path = "../../entity", version = "0.68.0" }
cranelift-codegen-shared = { path = "../shared", version = "0.73.0" }
cranelift-entity = { path = "../../entity", version = "0.73.0" }
[badges]
maintenance = { status = "experimental" }

Просмотреть файл

@ -296,7 +296,8 @@ impl Var {
pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) {
assert!(
self.get_def(position).is_none(),
format!("redefinition of variable {}", self.name)
"redefinition of variable {}",
self.name
);
match position {
PatternPosition::Source => {
@ -461,7 +462,8 @@ impl Apply {
// Basic check on number of arguments.
assert!(
inst.operands_in.len() == args.len(),
format!("incorrect number of arguments in instruction {}", inst.name)
"incorrect number of arguments in instruction {}",
inst.name
);
// Check that the kinds of Literals arguments match the expected operand.

Просмотреть файл

@ -153,10 +153,9 @@ impl EncodingBuilder {
let inst = self.inst.inst();
assert!(
Rc::ptr_eq(&inst.format, &recipes[self.recipe].format),
format!(
"Inst {} and recipe {} must have the same format!",
inst.name, recipes[self.recipe].name
)
"Inst {} and recipe {} must have the same format!",
inst.name,
recipes[self.recipe].name
);
assert_eq!(

Просмотреть файл

@ -394,7 +394,7 @@ impl ValueTypeOrAny {
pub fn expect(self, msg: &str) -> ValueType {
match self {
ValueTypeOrAny::ValueType(vt) => vt,
ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)),
ValueTypeOrAny::Any => panic!("Unexpected Any: {}", msg),
}
}
}
@ -665,7 +665,7 @@ fn verify_polymorphic(
if operands_out.is_empty() {
// No result means no other possible type variable, so it's a type inference failure.
match maybe_error_message {
Some(msg) => panic!(msg),
Some(msg) => panic!("{}", msg),
None => panic!("typevar_operand must be a free type variable"),
}
}

Просмотреть файл

@ -260,10 +260,9 @@ impl EncodingRecipeBuilder {
if !self.format.has_value_list {
assert!(
operands_in.len() == self.format.num_value_operands,
format!(
"missing operand constraints for recipe {} (format {})",
self.name, self.format.name
)
"missing operand constraints for recipe {} (format {})",
self.name,
self.format.name
);
}

Просмотреть файл

@ -20,6 +20,7 @@ pub(crate) enum SpecificSetting {
#[derive(Hash, PartialEq, Eq)]
pub(crate) struct Setting {
pub name: &'static str,
pub description: &'static str,
pub comment: &'static str,
pub specific: SpecificSetting,
pub byte_offset: u8,
@ -88,6 +89,7 @@ impl Into<PresetType> for PresetIndex {
#[derive(Hash, PartialEq, Eq)]
pub(crate) struct Preset {
pub name: &'static str,
pub description: &'static str,
values: Vec<BoolSettingIndex>,
}
@ -169,6 +171,7 @@ pub(crate) enum ProtoSpecificSetting {
/// This is the information provided during building for a setting.
struct ProtoSetting {
name: &'static str,
description: &'static str,
comment: &'static str,
specific: ProtoSpecificSetting,
}
@ -251,11 +254,13 @@ impl SettingGroupBuilder {
fn add_setting(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
specific: ProtoSpecificSetting,
) {
self.settings.push(ProtoSetting {
name,
description,
comment,
specific,
})
@ -264,6 +269,7 @@ impl SettingGroupBuilder {
pub fn add_bool(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
default: bool,
) -> BoolSettingIndex {
@ -271,28 +277,55 @@ impl SettingGroupBuilder {
self.predicates.is_empty(),
"predicates must be added after the boolean settings"
);
self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Bool(default),
);
BoolSettingIndex(self.settings.len() - 1)
}
pub fn add_enum(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
values: Vec<&'static str>,
) {
self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Enum(values),
);
}
pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
pub fn add_num(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
default: u8,
) {
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Num(default),
);
}
pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
self.predicates.push(ProtoPredicate { name, node });
}
pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
pub fn add_preset(
&mut self,
name: &'static str,
description: &'static str,
args: Vec<PresetType>,
) -> PresetIndex {
let mut values = Vec::new();
for arg in args {
match arg {
@ -302,7 +335,11 @@ impl SettingGroupBuilder {
PresetType::BoolSetting(index) => values.push(index),
}
}
self.presets.push(Preset { name, values });
self.presets.push(Preset {
name,
description,
values,
});
PresetIndex(self.presets.len() - 1)
}
@ -347,6 +384,7 @@ impl SettingGroupBuilder {
group.settings.push(Setting {
name: s.name,
description: s.description,
comment: s.comment,
byte_offset,
specific,
@ -367,6 +405,7 @@ impl SettingGroupBuilder {
};
group.settings.push(Setting {
name: s.name,
description: s.description,
comment: s.comment,
byte_offset: byte_offset + predicate_number / 8,
specific: SpecificSetting::Bool(BoolSetting {

Просмотреть файл

@ -210,7 +210,8 @@ impl TypeEnvironment {
None => {
assert!(
!actual_tv.name.starts_with("typeof_"),
format!("variable {} should be explicitly ranked", actual_tv.name)
"variable {} should be explicitly ranked",
actual_tv.name
);
None
}

Просмотреть файл

@ -74,19 +74,18 @@ impl Transform {
for &var_index in &input_vars {
assert!(
var_pool.get(var_index).is_input(),
format!("'{:?}' used as both input and def", var_pool.get(var_index))
"'{:?}' used as both input and def",
var_pool.get(var_index)
);
}
assert!(
input_vars.len() == num_src_inputs,
format!(
"extra input vars in dst pattern: {:?}",
input_vars
.iter()
.map(|&i| var_pool.get(i))
.skip(num_src_inputs)
.collect::<Vec<_>>()
)
"extra input vars in dst pattern: {:?}",
input_vars
.iter()
.map(|&i| var_pool.get(i))
.skip(num_src_inputs)
.collect::<Vec<_>>()
);
// Perform type inference and cleanup.
@ -143,7 +142,8 @@ impl Transform {
let defined_var = self.var_pool.get(var_index);
assert!(
defined_var.is_output(),
format!("{:?} not defined in the destination pattern", defined_var)
"{:?} not defined in the destination pattern",
defined_var
);
}
}
@ -226,7 +226,8 @@ fn rewrite_expr(
let var = var_pool.get(own_var);
assert!(
var.is_input() || var.get_def(position).is_some(),
format!("{:?} used as both input and def", var)
"{:?} used as both input and def",
var
);
args.push(Expr::Var(own_var));
}
@ -400,10 +401,8 @@ impl TransformGroupBuilder {
self.custom_legalizes
.insert(inst.camel_name.clone(), func_name)
.is_none(),
format!(
"custom legalization action for {} inserted twice",
inst.name
)
"custom legalization action for {} inserted twice",
inst.name
);
}
@ -442,7 +441,8 @@ impl TransformGroups {
for group in self.groups.values() {
assert!(
group.name != new_group.name,
format!("trying to insert {} for the second time", new_group.name)
"trying to insert {} for the second time",
new_group.name
);
}
self.groups.push(new_group)
@ -459,7 +459,7 @@ impl TransformGroups {
return group;
}
}
panic!(format!("transform group with name {} not found", name));
panic!("transform group with name {} not found", name);
}
}

Просмотреть файл

@ -99,8 +99,8 @@ fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter)
Some(previous_format_name) => {
assert!(
previous_format_name == leaf_format_name,
format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
));
"Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
);
}
}
}

Просмотреть файл

@ -68,6 +68,7 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
/// `ValueList` to store the additional information out of line.
fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
fmt.line("#[derive(Clone, Debug)]");
fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
fmt.line("#[allow(missing_docs)]");
fmt.line("pub enum InstructionData {");
fmt.indent(|fmt| {
@ -410,7 +411,10 @@ fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
fmt.line("#[repr(u16)]");
fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]");
fmt.line(
r#"#[cfg_attr(feature = "enable-peepmatic", derive(serde::Serialize, serde::Deserialize))]"#
r#"#[cfg_attr(
any(feature = "enable-peepmatic", feature = "enable-serde"),
derive(serde::Serialize, serde::Deserialize)
)]"#,
);
// We explicitly set the discriminant of the first variant to 1, which allows us to take

Просмотреть файл

@ -70,6 +70,33 @@ fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatte
fmtln!(fmt, "}");
}
/// Generates the `iter` function.
fn gen_iterator(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "impl Flags {");
fmt.indent(|fmt| {
fmt.doc_comment("Iterates the setting values.");
fmtln!(fmt, "pub fn iter(&self) -> impl Iterator<Item = Value> {");
fmt.indent(|fmt| {
fmtln!(fmt, "let mut bytes = [0; {}];", group.settings_size);
fmtln!(fmt, "bytes.copy_from_slice(&self.bytes[0..{}]);", group.settings_size);
fmtln!(fmt, "DESCRIPTORS.iter().filter_map(move |d| {");
fmt.indent(|fmt| {
fmtln!(fmt, "let values = match &d.detail {");
fmt.indent(|fmt| {
fmtln!(fmt, "detail::Detail::Preset => return None,");
fmtln!(fmt, "detail::Detail::Enum { last, enumerators } => Some(TEMPLATE.enums(*last, *enumerators)),");
fmtln!(fmt, "_ => None");
});
fmtln!(fmt, "};");
fmtln!(fmt, "Some(Value{ name: d.name, detail: d.detail, values, value: bytes[d.offset as usize] })");
});
fmtln!(fmt, "})");
});
fmtln!(fmt, "}");
});
fmtln!(fmt, "}");
}
/// Emit Display and FromStr implementations for enum settings.
fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
fmtln!(fmt, "impl fmt::Display for {} {{", name);
@ -136,7 +163,7 @@ fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {
/// Emit a getter function for `setting`.
fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
fmt.doc_comment(setting.comment);
fmt.doc_comment(format!("{}\n{}", setting.description, setting.comment));
match setting.specific {
SpecificSetting::Bool(BoolSetting {
predicate_number, ..
@ -254,6 +281,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "detail::Descriptor {");
fmt.indent(|fmt| {
fmtln!(fmt, "name: \"{}\",", setting.name);
fmtln!(fmt, "description: \"{}\",", setting.description);
fmtln!(fmt, "offset: {},", setting.byte_offset);
match setting.specific {
SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
@ -286,6 +314,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "detail::Descriptor {");
fmt.indent(|fmt| {
fmtln!(fmt, "name: \"{}\",", preset.name);
fmtln!(fmt, "description: \"{}\",", preset.description);
fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
fmtln!(fmt, "detail: detail::Detail::Preset,");
});
@ -418,7 +447,7 @@ fn gen_display(group: &SettingGroup, fmt: &mut Formatter) {
fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
// Generate struct.
fmtln!(fmt, "#[derive(Clone)]");
fmtln!(fmt, "#[derive(Clone, Hash)]");
fmt.doc_comment(format!("Flags group `{}`.", group.name));
fmtln!(fmt, "pub struct Flags {");
fmt.indent(|fmt| {
@ -427,6 +456,7 @@ fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
fmtln!(fmt, "}");
gen_constructor(group, parent, fmt);
gen_iterator(group, fmt);
gen_enum_types(group, fmt);
gen_getters(group, fmt);
gen_descriptors(group, fmt);

Просмотреть файл

@ -8,7 +8,10 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
use crate::shared::Definitions as SharedDefinitions;
fn define_settings(_shared: &SettingGroup) -> SettingGroup {
let setting = SettingGroupBuilder::new("arm64");
let mut setting = SettingGroupBuilder::new("arm64");
let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false);
setting.add_predicate("use_lse", predicate!(has_lse));
setting.build()
}

Просмотреть файл

@ -17,33 +17,39 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
let supports_m = setting.add_bool(
"supports_m",
"CPU supports the 'M' extension (mul/div)",
"",
false,
);
let supports_a = setting.add_bool(
"supports_a",
"CPU supports the 'A' extension (atomics)",
"",
false,
);
let supports_f = setting.add_bool(
"supports_f",
"CPU supports the 'F' extension (float)",
"",
false,
);
let supports_d = setting.add_bool(
"supports_d",
"CPU supports the 'D' extension (double)",
"",
false,
);
let enable_m = setting.add_bool(
"enable_m",
"Enable the use of 'M' instructions if available",
"",
true,
);
setting.add_bool(
"enable_e",
"Enable the 'RV32E' instruction set with only 16 registers",
"",
false,
);

Просмотреть файл

@ -25,7 +25,8 @@ impl RecipeGroup {
fn push(&mut self, builder: EncodingRecipeBuilder) {
assert!(
self.name_to_recipe.get(&builder.name).is_none(),
format!("riscv recipe '{}' created twice", builder.name)
"riscv recipe '{}' created twice",
builder.name
);
let name = builder.name.clone();
let number = self.recipes.push(builder.build());

Просмотреть файл

@ -45,10 +45,8 @@ impl PerCpuModeEncodings {
if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
assert!(
self.recipes[*found_index] == recipe,
format!(
"trying to insert different recipes with a same name ({})",
recipe.name
)
"trying to insert different recipes with a same name ({})",
recipe.name
);
*found_index
} else {
@ -549,10 +547,13 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
}
e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
// You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be
// on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can
// safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode.
let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
e.enc_both_instp(
iconst.bind(I8),
rec_u_id_z.opcodes(&XORB),
rec_u_id_z.opcodes(&XOR),
is_zero_int.clone(),
);
@ -1688,6 +1689,7 @@ fn define_simd(
let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst");
let vselect = shared.by_name("vselect");
let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s");
let x86_cvtt2si = x86.by_name("x86_cvtt2si");
let x86_insertps = x86.by_name("x86_insertps");
let x86_fmax = x86.by_name("x86_fmax");
@ -2210,6 +2212,9 @@ fn define_simd(
// SIMD multiplication with lane expansion.
e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
// SIMD multiplication and add adjacent pairs, from SSE2.
e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD));
// SIMD integer multiplication for I64x2 using a AVX512.
{
e.enc_32_64_maybe_isap(

Просмотреть файл

@ -396,7 +396,6 @@ fn define_simd(
let insertlane = insts.by_name("insertlane");
let ishl = insts.by_name("ishl");
let ishl_imm = insts.by_name("ishl_imm");
let load_splat = insts.by_name("load_splat");
let raw_bitcast = insts.by_name("raw_bitcast");
let scalar_to_vector = insts.by_name("scalar_to_vector");
let splat = insts.by_name("splat");
@ -821,7 +820,6 @@ fn define_simd(
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
narrow.custom_legalize(fmin, "expand_minmax_vector");
narrow.custom_legalize(fmax, "expand_minmax_vector");
narrow.custom_legalize(load_splat, "expand_load_splat");
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");

Просмотреть файл

@ -508,6 +508,9 @@ pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
/// Multiply the packed word integers, add adjacent doubleword results.
pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
/// Pop top of stack into r{16,32,64}; increment stack pointer.
pub static POP_REG: [u8; 1] = [0x58];
@ -711,9 +714,6 @@ pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
/// r/m{16,32,64} XOR register of the same size.
pub static XOR: [u8; 1] = [0x31];
/// r/m8 XOR r8.
pub static XORB: [u8; 1] = [0x30];
/// Bitwise logical XOR of packed double-precision floating-point values.
pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];

Просмотреть файл

@ -4,37 +4,77 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
let mut settings = SettingGroupBuilder::new("x86");
// CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
let has_sse3 = settings.add_bool(
"has_sse3",
"Has support for SSE3.",
"SSE3: CPUID.01H:ECX.SSE3[bit 0]",
false,
);
let has_ssse3 = settings.add_bool(
"has_ssse3",
"Has support for SSSE3.",
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
false,
);
let has_sse41 = settings.add_bool(
"has_sse41",
"Has support for SSE4.1.",
"SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]",
false,
);
let has_sse42 = settings.add_bool(
"has_sse42",
"Has support for SSE4.2.",
"SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]",
false,
);
let has_avx = settings.add_bool(
"has_avx",
"Has support for AVX.",
"AVX: CPUID.01H:ECX.AVX[bit 28]",
false,
);
let has_avx2 = settings.add_bool(
"has_avx2",
"Has support for AVX2.",
"AVX2: CPUID.07H:EBX.AVX2[bit 5]",
false,
);
let has_avx512dq = settings.add_bool(
"has_avx512dq",
"Has support for AVX512DQ.",
"AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
false,
);
let has_avx512vl = settings.add_bool(
"has_avx512vl",
"Has support for AVX512VL.",
"AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
false,
);
let has_avx512f = settings.add_bool(
"has_avx512f",
"Has support for AVX512F.",
"AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
false,
);
let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
let has_popcnt = settings.add_bool(
"has_popcnt",
"Has support for POPCNT.",
"POPCNT: CPUID.01H:ECX.POPCNT[bit 23]",
false,
);
// CPUID.(EAX=07H, ECX=0H):EBX
let has_bmi1 = settings.add_bool(
"has_bmi1",
"Has support for BMI1.",
"BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
false,
);
let has_bmi2 = settings.add_bool(
"has_bmi2",
"Has support for BMI2.",
"BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
false,
);
@ -42,6 +82,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
// CPUID.EAX=80000001H:ECX
let has_lzcnt = settings.add_bool(
"has_lzcnt",
"Has support for LZCNT.",
"LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
false,
);
@ -85,7 +126,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
// Some shared boolean values are used in x86 instruction predicates, so we need to group them
// in the same TargetIsa, for compabitibity with code generated by meta-python.
// in the same TargetIsa, for compatibility with code generated by meta-python.
// TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
// back in the shared SettingGroup, and use it in x86 instruction predicates.
@ -104,21 +145,40 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
// Presets corresponding to x86 CPUs.
settings.add_preset("baseline", preset!());
settings.add_preset(
"baseline",
"A baseline preset with no extensions enabled.",
preset!(),
);
let nehalem = settings.add_preset(
"nehalem",
"Nehalem microarchitecture.",
preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
);
let haswell = settings.add_preset(
"haswell",
"Haswell microarchitecture.",
preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
);
let broadwell = settings.add_preset("broadwell", preset!(haswell));
let skylake = settings.add_preset("skylake", preset!(broadwell));
let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
settings.add_preset("icelake", preset!(cannonlake));
let broadwell = settings.add_preset(
"broadwell",
"Broadwell microarchitecture.",
preset!(haswell),
);
let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell));
let cannonlake = settings.add_preset(
"cannonlake",
"Canon Lake microarchitecture.",
preset!(skylake),
);
settings.add_preset(
"icelake",
"Ice Lake microarchitecture.",
preset!(cannonlake),
);
settings.add_preset(
"znver1",
"Zen (first generation) microarchitecture.",
preset!(
has_sse3
&& has_ssse3

Просмотреть файл

@ -164,9 +164,14 @@ impl Immediates {
atomic_rmw_op_values.insert("add", "Add");
atomic_rmw_op_values.insert("sub", "Sub");
atomic_rmw_op_values.insert("and", "And");
atomic_rmw_op_values.insert("nand", "Nand");
atomic_rmw_op_values.insert("or", "Or");
atomic_rmw_op_values.insert("xor", "Xor");
atomic_rmw_op_values.insert("xchg", "Xchg");
atomic_rmw_op_values.insert("umin", "Umin");
atomic_rmw_op_values.insert("umax", "Umax");
atomic_rmw_op_values.insert("smin", "Smin");
atomic_rmw_op_values.insert("smax", "Smax");
new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values)
.with_doc("Atomic Read-Modify-Write Ops")
},

Просмотреть файл

@ -3582,7 +3582,7 @@ pub(crate) fn define(
"fmin_pseudo",
r#"
Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``.
See https://github.com/WebAssembly/simd/pull/122 for background.
See <https://github.com/WebAssembly/simd/pull/122> for background.
The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@ -3614,7 +3614,7 @@ pub(crate) fn define(
"fmax_pseudo",
r#"
Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``.
See https://github.com/WebAssembly/simd/pull/122 for background.
See <https://github.com/WebAssembly/simd/pull/122> for background.
The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@ -4102,7 +4102,7 @@ pub(crate) fn define(
This will double the lane width and halve the number of lanes. So the resulting
vector has the same number of bits as `x` and `y` do (individually).
See https://github.com/WebAssembly/simd/pull/127 for background info.
See <https://github.com/WebAssembly/simd/pull/127> for background info.
"#,
&formats.binary,
)
@ -4325,6 +4325,26 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"fcvt_low_from_sint",
r#"
Converts packed signed doubleword integers to packed double precision floating point.
Considering only the low half of the register, each lane in `x` is interpreted as a
signed doubleword integer that is then converted to a double precision float. This
instruction differs from fcvt_from_sint in that it converts half the number of lanes
which are converted to occupy twice the number of bits. No rounding should be needed
for the resulting float.
The result type will have half the number of vector lanes as the input.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
let WideInt = &TypeVar::new(
"WideInt",
"An integer type with lanes from `i16` upwards",
@ -4491,24 +4511,5 @@ pub(crate) fn define(
.other_side_effects(true),
);
let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
let a = &Operand::new("a", TxN);
ig.push(
Inst::new(
"load_splat",
r#"
Load an element from memory at ``p + Offset`` and return a vector
whose lanes are all set to that element.
This is equivalent to ``load`` followed by ``splat``.
"#,
&formats.load,
)
.operands_in(vec![MemFlags, p, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.build()
}

Просмотреть файл

@ -5,29 +5,29 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"regalloc",
r#"Register allocator to use with the MachInst backend.
"Register allocator to use with the MachInst backend.",
r#"
This selects the register allocator as an option among those offered by the `regalloc.rs`
crate. Please report register allocation bugs to the maintainers of this crate whenever
possible.
This selects the register allocator as an option among those offered by the `regalloc.rs`
crate. Please report register allocation bugs to the maintainers of this crate whenever
possible.
Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
means the x86_64 backend doesn't use this yet.
Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
means the x86_64 backend doesn't use this yet.
Possible values:
Possible values:
- `backtracking` is a greedy, backtracking register allocator as implemented in
Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
it should generate better code in general, resulting in better throughput of generated
code.
- `backtracking_checked` is the backtracking allocator with additional self checks that may
take some time to run, and thus these checks are disabled by default.
- `experimental_linear_scan` is an experimental linear scan allocator. It may take less
time to allocate registers, but generated code's quality may be inferior. As of
2020-04-17, it is still experimental and it should not be used in production settings.
- `experimental_linear_scan_checked` is the linear scan allocator with additional self
checks that may take some time to run, and thus these checks are disabled by default.
"#,
- `backtracking` is a greedy, backtracking register allocator as implemented in
Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
it should generate better code in general, resulting in better throughput of generated
code.
- `backtracking_checked` is the backtracking allocator with additional self checks that may
take some time to run, and thus these checks are disabled by default.
- `experimental_linear_scan` is an experimental linear scan allocator. It may take less
time to allocate registers, but generated code's quality may be inferior. As of
2020-04-17, it is still experimental and it should not be used in production settings.
- `experimental_linear_scan_checked` is the linear scan allocator with additional self
checks that may take some time to run, and thus these checks are disabled by default.
"#,
vec![
"backtracking",
"backtracking_checked",
@ -38,24 +38,23 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"opt_level",
"Optimization level for generated code.",
r#"
Optimization level:
Supported levels:
- none: Minimise compile time by disabling most optimizations.
- speed: Generate the fastest possible code
- speed_and_size: like "speed", but also perform transformations
aimed at reducing code size.
- `none`: Minimise compile time by disabling most optimizations.
- `speed`: Generate the fastest possible code
- `speed_and_size`: like "speed", but also perform transformations aimed at reducing code size.
"#,
vec!["none", "speed", "speed_and_size"],
);
settings.add_bool(
"enable_verifier",
"Run the Cranelift IR verifier at strategic times during compilation.",
r#"
Run the Cranelift IR verifier at strategic times during compilation.
This makes compilation slower but catches many bugs. The verifier is always enabled by
default, which is useful during development.
This makes compilation slower but catches many bugs. The verifier is always enabled by
default, which is useful during development.
"#,
true,
);
@ -65,110 +64,110 @@ pub(crate) fn define() -> SettingGroup {
// `colocated` flag on external functions and global values.
settings.add_bool(
"is_pic",
"Enable Position-Independent Code generation",
"Enable Position-Independent Code generation.",
"",
false,
);
settings.add_bool(
"use_colocated_libcalls",
"Use colocated libcalls.",
r#"
Use colocated libcalls.
Generate code that assumes that libcalls can be declared "colocated",
meaning they will be defined along with the current function, such that
they can use more efficient addressing.
"#,
"#,
false,
);
settings.add_bool(
"avoid_div_traps",
"Generate explicit checks around native division instructions to avoid their trapping.",
r#"
Generate explicit checks around native division instructions to avoid
their trapping.
This is primarily used by SpiderMonkey which doesn't install a signal
handler for SIGFPE, but expects a SIGILL trap for division by zero.
On ISAs like ARM where the native division instructions don't trap,
this setting has no effect - explicit checks are always inserted.
"#,
"#,
false,
);
settings.add_bool(
"enable_float",
"Enable the use of floating-point instructions.",
r#"
Enable the use of floating-point instructions
Disabling use of floating-point instructions is not yet implemented.
"#,
"#,
true,
);
settings.add_bool(
"enable_nan_canonicalization",
"Enable NaN canonicalization.",
r#"
Enable NaN canonicalization
This replaces NaNs with a single canonical value, for users requiring
entirely deterministic WebAssembly computation. This is not required
by the WebAssembly spec, so it is not enabled by default.
"#,
"#,
false,
);
settings.add_bool(
"enable_pinned_reg",
r#"Enable the use of the pinned register.
This register is excluded from register allocation, and is completely under the control of
the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
with the set_pinned_reg instruction.
"Enable the use of the pinned register.",
r#"
This register is excluded from register allocation, and is completely under the control of
the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
with the set_pinned_reg instruction.
"#,
false,
);
settings.add_bool(
"use_pinned_reg_as_heap_base",
r#"Use the pinned register as the heap base.
"Use the pinned register as the heap base.",
r#"
Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
legalization of the `heap_addr` instruction so it will use the pinned register as the heap
base, instead of fetching it from a global value.
Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
legalization of the `heap_addr` instruction so it will use the pinned register as the heap
base, instead of fetching it from a global value.
Warning! Enabling this means that the pinned register *must* be maintained to contain the
heap base address at all times, during the lifetime of a function. Using the pinned
register for other purposes when this is set is very likely to cause crashes.
Warning! Enabling this means that the pinned register *must* be maintained to contain the
heap base address at all times, during the lifetime of a function. Using the pinned
register for other purposes when this is set is very likely to cause crashes.
"#,
false,
);
settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
settings.add_bool(
"enable_simd",
"Enable the use of SIMD instructions.",
"",
false,
);
settings.add_bool(
"enable_atomics",
"Enable the use of atomic instructions",
"",
true,
);
settings.add_bool(
"enable_safepoints",
"Enable safepoint instruction insertions.",
r#"
Enable safepoint instruction insertions.
This will allow the emit_stack_maps() function to insert the safepoint
instruction on top of calls and interrupt traps in order to display the
live reference values at that point in the program.
"#,
"#,
false,
);
settings.add_enum(
"tls_model",
r#"
Defines the model used to perform TLS accesses.
"#,
"Defines the model used to perform TLS accesses.",
"",
vec!["none", "elf_gd", "macho", "coff"],
);
@ -176,9 +175,9 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"libcall_call_conv",
"Defines the calling convention to use for LibCalls call expansion.",
r#"
Defines the calling convention to use for LibCalls call expansion,
since it may be different from the ISA default calling convention.
This may be different from the ISA default calling convention.
The default value is to use the same calling convention as the ISA
default calling convention.
@ -192,6 +191,7 @@ pub(crate) fn define() -> SettingGroup {
"cold",
"system_v",
"windows_fastcall",
"apple_aarch64",
"baldrdash_system_v",
"baldrdash_windows",
"baldrdash_2020",
@ -201,9 +201,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_num(
"baldrdash_prologue_words",
"Number of pointer-sized words pushed by the baldrdash prologue.",
r#"
Number of pointer-sized words pushed by the baldrdash prologue.
Functions with the `baldrdash` calling convention don't generate their
own prologue and epilogue. They depend on externally generated code
that pushes a fixed number of words in the prologue and restores them
@ -212,15 +211,46 @@ pub(crate) fn define() -> SettingGroup {
This setting configures the number of pointer-sized words pushed on the
stack when the Cranelift-generated code is entered. This includes the
pushed return address on x86.
"#,
"#,
0,
);
settings.add_bool(
"enable_llvm_abi_extensions",
"Enable various ABI extensions defined by LLVM's behavior.",
r#"
In some cases, LLVM's implementation of an ABI (calling convention)
goes beyond a standard and supports additional argument types or
behavior. This option instructs Cranelift codegen to follow LLVM's
behavior where applicable.
Currently, this applies only to Windows Fastcall on x86-64, and
allows an `i128` argument to be spread across two 64-bit integer
registers. The Fastcall implementation otherwise does not support
`i128` arguments, and will panic if they are present and this
option is not set.
"#,
false,
);
settings.add_bool(
"unwind_info",
"Generate unwind information.",
r#"
This increases metadata size and compile time, but allows for the
debugger to trace frames, is needed for GC tracing that relies on
libunwind (such as in Wasmtime), and is unconditionally needed on
certain platforms (such as Windows) that must always be able to unwind.
"#,
true,
);
// BaldrMonkey requires that not-yet-relocated function addresses be encoded
// as all-ones bitpatterns.
settings.add_bool(
"emit_all_ones_funcaddrs",
"Emit not-yet-relocated function addresses as all-ones bit patterns.",
"",
false,
);
@ -228,32 +258,27 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_probestack",
r#"
Enable the use of stack probes, for calling conventions which support this
functionality.
"#,
"Enable the use of stack probes for supported calling conventions.",
"",
true,
);
settings.add_bool(
"probestack_func_adjusts_sp",
r#"
Set this to true of the stack probe function modifies the stack pointer
itself.
"#,
"Enable if the stack probe adjusts the stack pointer.",
"",
false,
);
settings.add_num(
"probestack_size_log2",
"The log2 of the size of the stack guard region.",
r#"
The log2 of the size of the stack guard region.
Stack frames larger than this size will have stack overflow checked
by calling the probestack function.
The default is 12, which translates to a size of 4096.
"#,
"#,
12,
);
@ -262,6 +287,7 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_jump_tables",
"Enable the use of jump tables in generated machine code.",
"",
true,
);
@ -269,16 +295,15 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_heap_access_spectre_mitigation",
"Enable Spectre mitigation on heap bounds checks.",
r#"
Enable Spectre mitigation on heap bounds checks.
This is a no-op for any heap that needs no bounds checks; e.g.,
if the limit is static and the guard region is large enough that
the index cannot reach past it.
This is a no-op for any heap that needs no bounds checks; e.g.,
if the limit is static and the guard region is large enough that
the index cannot reach past it.
This option is enabled by default because it is highly
recommended for secure sandboxing. The embedder should consider
the security implications carefully before disabling this option.
This option is enabled by default because it is highly
recommended for secure sandboxing. The embedder should consider
the security implications carefully before disabling this option.
"#,
true,
);

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"322ab8efd1588c57313b18aaa231ee30a888741828cf27283e6c62735d73d02d","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"7a8eda4dafcf47100c41e61b5c985f089d1985c500624956dc183fcf6bc7b183"},"package":null}
{"files":{"Cargo.toml":"940852948d4feaabc1a1b694b6901509099d5f464f623928edd24b9cacd0c8dd","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"d9f657a24170255c8136c2b07a2a982f9a4e02f23d425cb07fdf95b76c15825d","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"7a8eda4dafcf47100c41e61b5c985f089d1985c500624956dc183fcf6bc7b183"},"package":null}

Просмотреть файл

@ -1,11 +1,17 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen-shared"
version = "0.68.0"
version = "0.73.0"
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"
readme = "README.md"
edition = "2018"
# Since this is a shared dependency of several packages, please strive to keep this dependency-free.
[dependencies]
# Since this is a shared dependency of several packages, please strive to keep this dependency-free
# when no features are enabled.
serde = { version = "1.0.94", features = ["derive"], optional = true }
[features]
enable-serde = ["serde"]

Просмотреть файл

@ -7,6 +7,9 @@
use core::fmt::{self, Display, Formatter};
use core::str::FromStr;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Common traits of condition codes.
pub trait CondCode: Copy {
/// Get the inverse condition code of `self`.
@ -30,6 +33,7 @@ pub trait CondCode: Copy {
/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
/// difference.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum IntCC {
/// `==`.
Equal,
@ -187,6 +191,7 @@ impl FromStr for IntCC {
/// comparison. The 14 condition codes here cover every possible combination of the relation above
/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum FloatCC {
/// EQ | LT | GT
Ordered,

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

41
third_party/rust/cranelift-codegen/Cargo.toml поставляемый
Просмотреть файл

@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen"
version = "0.68.0"
version = "0.73.0"
description = "Low-level code generator library"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-codegen"
@ -13,31 +13,31 @@ build = "build.rs"
edition = "2018"
[dependencies]
cranelift-codegen-shared = { path = "./shared", version = "0.68.0" }
cranelift-entity = { path = "../entity", version = "0.68.0" }
cranelift-bforest = { path = "../bforest", version = "0.68.0" }
cranelift-codegen-shared = { path = "./shared", version = "0.73.0" }
cranelift-entity = { path = "../entity", version = "0.73.0" }
cranelift-bforest = { path = "../bforest", version = "0.73.0" }
hashbrown = { version = "0.9.1", optional = true }
target-lexicon = "0.11"
target-lexicon = "0.12"
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
bincode = { version = "1.2.1", optional = true }
gimli = { version = "0.23.0", default-features = false, features = ["write"], optional = true }
smallvec = { version = "1.0.0" }
smallvec = { version = "1.6.1" }
thiserror = "1.0.4"
byteorder = { version = "1.3.2", default-features = false }
peepmatic = { path = "../peepmatic", optional = true, version = "0.68.0" }
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.68.0" }
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.68.0" }
regalloc = { git = "https://github.com/mozilla-spidermonkey/regalloc.rs", rev = "fc5d1d33317b0fbd36725757f80a95127eff5109" }
souper-ir = { version = "1", optional = true }
wast = { version = "27.0.0", optional = true }
peepmatic = { path = "../peepmatic", optional = true, version = "0.73.0" }
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.73.0" }
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.73.0" }
regalloc = { version = "0.0.31" }
souper-ir = { version = "2.1.0", optional = true }
wast = { version = "35.0.0", optional = true }
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary
# machine code. Integration tests that need external dependencies can be
# accomodated in `tests`.
[build-dependencies]
cranelift-codegen-meta = { path = "meta", version = "0.68.0" }
cranelift-codegen-meta = { path = "meta", version = "0.73.0" }
[features]
default = ["std", "unwind"]
@ -63,9 +63,15 @@ unwind = ["gimli"]
x86 = []
arm64 = []
riscv = []
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
arm32 = [] # Work-in-progress codegen backend for ARM.
# Stub feature that does nothing, for Cargo-features compatibility: the new
# backend is the default now.
experimental_x64 = []
# Make the old x86 backend the default.
old-x86-backend = []
# Option to enable all architectures.
all-arch = [
"x86",
@ -74,7 +80,12 @@ all-arch = [
]
# For dependent crates that want to serialize some parts of cranelift
enable-serde = ["serde"]
enable-serde = [
"serde",
"regalloc/enable-serde",
"cranelift-entity/enable-serde",
"cranelift-codegen-shared/enable-serde"
]
# Allow snapshotting regalloc test cases. Useful only to report bad register
# allocation failures, or for regalloc.rs developers.

37
third_party/rust/cranelift-codegen/build.rs поставляемый
Просмотреть файл

@ -17,6 +17,7 @@
use cranelift_codegen_meta as meta;
use std::env;
use std::io::Read;
use std::process;
use std::time::Instant;
@ -97,4 +98,40 @@ fn main() {
)
.unwrap()
}
let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
let mut cmd = std::process::Command::new("git");
cmd.arg("rev-parse")
.arg("HEAD")
.stdout(std::process::Stdio::piped())
.current_dir(env::var("CARGO_MANIFEST_DIR").unwrap());
let version = if let Ok(mut child) = cmd.spawn() {
let mut git_rev = String::new();
child
.stdout
.as_mut()
.unwrap()
.read_to_string(&mut git_rev)
.unwrap();
let status = child.wait().unwrap();
if status.success() {
let git_rev = git_rev.trim().chars().take(9).collect::<String>();
format!("{}-{}", pkg_version, git_rev)
} else {
// not a git repo
pkg_version
}
} else {
// git not available
pkg_version
};
std::fs::write(
std::path::Path::new(&out_dir).join("version.rs"),
format!(
"/// Version number of this crate. \n\
pub const VERSION: &str = \"{}\";",
version
),
)
.unwrap();
}

Просмотреть файл

@ -22,7 +22,7 @@ use crate::legalize_function;
use crate::legalizer::simple_legalize;
use crate::licm::do_licm;
use crate::loop_analysis::LoopAnalysis;
use crate::machinst::MachCompileResult;
use crate::machinst::{MachCompileResult, MachStackMap};
use crate::nan_canonicalization::do_nan_canonicalization;
use crate::postopt::do_postopt;
use crate::redundant_reload_remover::RedundantReloadRemover;
@ -239,10 +239,23 @@ impl Context {
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps);
if let Some(ref result) = &self.mach_compile_result {
result.buffer.emit(&mut sink);
let info = sink.info;
// New backends do not emit StackMaps through the `CodeSink` because its interface
// requires `Value`s; instead, the `StackMap` objects are directly accessible via
// `result.buffer.stack_maps()`.
for &MachStackMap {
offset_end,
ref stack_map,
..
} in result.buffer.stack_maps()
{
stack_maps.add_stack_map(offset_end, stack_map.clone());
}
info
} else {
isa.emit_function_to_memory(&self.func, &mut sink);
sink.info
}
sink.info
}
/// Creates unwind information for the function.
@ -460,6 +473,7 @@ impl Context {
Ok(build_value_labels_ranges::<ComparableSourceLoc>(
&self.func,
&self.regalloc,
self.mach_compile_result.as_ref(),
isa,
))
}

Просмотреть файл

@ -14,12 +14,22 @@ pub enum AtomicRmwOp {
Sub,
/// And
And,
/// Nand
Nand,
/// Or
Or,
/// Xor
Xor,
/// Exchange
Xchg,
/// Unsigned min
Umin,
/// Unsigned max
Umax,
/// Signed min
Smin,
/// Signed max
Smax,
}
impl Display for AtomicRmwOp {
@ -28,9 +38,14 @@ impl Display for AtomicRmwOp {
AtomicRmwOp::Add => "add",
AtomicRmwOp::Sub => "sub",
AtomicRmwOp::And => "and",
AtomicRmwOp::Nand => "nand",
AtomicRmwOp::Or => "or",
AtomicRmwOp::Xor => "xor",
AtomicRmwOp::Xchg => "xchg",
AtomicRmwOp::Umin => "umin",
AtomicRmwOp::Umax => "umax",
AtomicRmwOp::Smin => "smin",
AtomicRmwOp::Smax => "smax",
};
f.write_str(s)
}
@ -43,9 +58,14 @@ impl FromStr for AtomicRmwOp {
"add" => Ok(AtomicRmwOp::Add),
"sub" => Ok(AtomicRmwOp::Sub),
"and" => Ok(AtomicRmwOp::And),
"nand" => Ok(AtomicRmwOp::Nand),
"or" => Ok(AtomicRmwOp::Or),
"xor" => Ok(AtomicRmwOp::Xor),
"xchg" => Ok(AtomicRmwOp::Xchg),
"umin" => Ok(AtomicRmwOp::Umin),
"umax" => Ok(AtomicRmwOp::Umax),
"smin" => Ok(AtomicRmwOp::Smin),
"smax" => Ok(AtomicRmwOp::Smax),
_ => Err(()),
}
}

Просмотреть файл

@ -19,12 +19,16 @@ use core::slice::Iter;
use core::str::{from_utf8, FromStr};
use cranelift_entity::EntityRef;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// This type describes the actual constant data. Note that the bytes stored in this structure are
/// expected to be in little-endian order; this is due to ease-of-use when interacting with
/// WebAssembly values, which are [little-endian by design].
///
/// [little-endian by design]: https://github.com/WebAssembly/design/blob/master/Portability.md
#[derive(Clone, Hash, Eq, PartialEq, Debug, Default)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ConstantData(Vec<u8>);
impl FromIterator<u8> for ConstantData {
@ -173,6 +177,7 @@ pub type ConstantOffset = u32;
/// from the beginning of the function is known (see
/// `relaxation` in `relaxation.rs`).
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ConstantPoolEntry {
data: ConstantData,
offset: Option<ConstantOffset>,
@ -197,6 +202,7 @@ impl ConstantPoolEntry {
/// Maintains the mapping between a constant handle (i.e. [`Constant`](crate::ir::Constant)) and
/// its constant data (i.e. [`ConstantData`](crate::ir::ConstantData)).
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ConstantPool {
/// This mapping maintains the insertion order as long as Constants are created with
/// sequentially increasing integers.

Просмотреть файл

@ -21,6 +21,9 @@ use core::mem;
use core::ops::{Index, IndexMut};
use core::u16;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// A data flow graph defines all instructions and basic blocks in a function as well as
/// the data flow dependencies between them. The DFG also tracks values which can be either
/// instruction results or block parameters.
@ -29,6 +32,7 @@ use core::u16;
/// `Layout` data structure which forms the other half of the function representation.
///
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct DataFlowGraph {
/// Data about all of the instructions in the function, including opcodes and operands.
/// The instructions in this map are not in program order. That is tracked by `Layout`, along
@ -416,6 +420,7 @@ impl ValueDef {
/// Internal table storage for extended values.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
enum ValueData {
/// Value is defined by an instruction.
Inst { ty: Type, num: u16, inst: Inst },
@ -935,6 +940,7 @@ impl DataFlowGraph {
/// branches to this block must provide matching arguments, and the arguments to the entry block must
/// match the function arguments.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
struct BlockData {
/// List of parameters to this block.
params: ValueList,

Просмотреть файл

@ -33,6 +33,7 @@ use serde::{Deserialize, Serialize};
///
/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Block(u32);
entity_impl!(Block, "block");
@ -65,6 +66,7 @@ impl Block {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Value(u32);
entity_impl!(Value, "v");
@ -97,6 +99,7 @@ impl Value {
///
/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Inst(u32);
entity_impl!(Inst, "inst");
@ -152,6 +155,7 @@ impl StackSlot {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct GlobalValue(u32);
entity_impl!(GlobalValue, "gv");
@ -177,6 +181,7 @@ impl GlobalValue {
/// While the order is stable, it is arbitrary and does not necessarily resemble the order in which
/// the constants are written in the constant pool.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Constant(u32);
entity_impl!(Constant, "const");
@ -202,6 +207,7 @@ impl Constant {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Immediate(u32);
entity_impl!(Immediate, "imm");
@ -267,6 +273,7 @@ impl JumpTable {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct FuncRef(u32);
entity_impl!(FuncRef, "fn");
@ -298,6 +305,7 @@ impl FuncRef {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct SigRef(u32);
entity_impl!(SigRef, "sig");
@ -323,6 +331,7 @@ impl SigRef {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Heap(u32);
entity_impl!(Heap, "heap");
@ -349,6 +358,7 @@ impl Heap {
///
/// While the order is stable, it is arbitrary.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Table(u32);
entity_impl!(Table, "table");
@ -367,6 +377,7 @@ impl Table {
/// An opaque reference to any of the entities defined in this module that can appear in CLIF IR.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum AnyEntity {
/// The whole function.
Function,

Просмотреть файл

@ -256,8 +256,19 @@ impl fmt::Display for AbiParam {
/// Function argument extension options.
///
/// On some architectures, small integer function arguments are extended to the width of a
/// general-purpose register.
/// On some architectures, small integer function arguments and/or return values are extended to
/// the width of a general-purpose register.
///
/// This attribute specifies how an argument or return value should be extended *if the platform
/// and ABI require it*. Because the frontend (CLIF generator) does not know anything about the
/// particulars of the target's ABI, and the CLIF should be platform-independent, these attributes
/// specify *how* to extend (according to the signedness of the original program) rather than
/// *whether* to extend.
///
/// For example, on x86-64, the SystemV ABI does not require extensions of narrow values, so these
/// `ArgumentExtension` attributes are ignored; but in the Baldrdash (SpiderMonkey) ABI on the same
/// platform, all narrow values *are* extended, so these attributes may lead to extra
/// zero/sign-extend instructions in the generated machine code.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum ArgumentExtension {
@ -398,6 +409,7 @@ impl FromStr for ArgumentPurpose {
///
/// Information about a function that can be called directly with a direct `call` instruction.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ExtFuncData {
/// Name of the external function.
pub name: ExternalName,

Просмотреть файл

@ -9,6 +9,9 @@ use core::cmp;
use core::fmt::{self, Write};
use core::str::FromStr;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
const TESTCASE_NAME_LENGTH: usize = 16;
/// The name of an external is either a reference to a user-defined symbol
@ -23,6 +26,7 @@ const TESTCASE_NAME_LENGTH: usize = 16;
/// In particular, many `.clif` test files use function names to identify
/// functions.
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum ExternalName {
/// A name in a user-defined symbol table. Cranelift does not interpret
/// these numbers in any way.

Просмотреть файл

@ -18,15 +18,63 @@ use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
use crate::regalloc::{EntryRegDiversions, RegDiversions};
use crate::value_label::ValueLabelsRanges;
use crate::write::write_function;
#[cfg(feature = "enable-serde")]
use alloc::string::String;
use alloc::vec::Vec;
use core::fmt;
/// A function.
#[cfg(feature = "enable-serde")]
use serde::de::{Deserializer, Error};
#[cfg(feature = "enable-serde")]
use serde::ser::Serializer;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// A version marker used to ensure that serialized clif ir is never deserialized with a
/// different version of Cranelift.
#[derive(Copy, Clone, Debug)]
pub struct VersionMarker;
#[cfg(feature = "enable-serde")]
impl Serialize for VersionMarker {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
crate::VERSION.serialize(serializer)
}
}
#[cfg(feature = "enable-serde")]
impl<'de> Deserialize<'de> for VersionMarker {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let version = String::deserialize(deserializer)?;
if version != crate::VERSION {
return Err(D::Error::custom(&format!(
"Expected a clif ir function for version {}, found one for version {}",
crate::VERSION,
version,
)));
}
Ok(VersionMarker)
}
}
///
/// Functions can be cloned, but it is not a very fast operation.
/// The clone will have all the same entity numbers as the original.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Function {
/// A version marker used to ensure that serialized clif ir is never deserialized with a
/// different version of Cranelift.
// Note: This must be the first field to ensure that Serde will deserialize it before
// attempting to deserialize other fields that are potentially changed between versions.
pub version_marker: VersionMarker,
/// Name of this function. Mostly used by `.clif` files.
pub name: ExternalName,
@ -109,6 +157,7 @@ impl Function {
/// Create a function with the given name and signature.
pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
Self {
version_marker: VersionMarker,
name,
signature: sig,
old_signature: None,

Просмотреть файл

@ -6,8 +6,12 @@ use crate::isa::TargetIsa;
use crate::machinst::RelocDistance;
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Information about a global value declaration.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum GlobalValueData {
/// Value is the address of the VM context struct.
VMContext,

Просмотреть файл

@ -4,8 +4,12 @@ use crate::ir::immediates::Uimm64;
use crate::ir::{GlobalValue, Type};
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Information about a heap declaration.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct HeapData {
/// The address of the start of the heap's storage.
pub base: GlobalValue,
@ -26,6 +30,7 @@ pub struct HeapData {
/// Style of heap including style-specific information.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum HeapStyle {
/// A dynamic heap can be relocated to a different base address when it is grown.
Dynamic {

Просмотреть файл

@ -48,6 +48,7 @@ impl IntoBytes for Vec<u8> {
/// An `Imm64` operand can also be used to represent immediate values of smaller integer types by
/// sign-extending to `i64`.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Imm64(i64);
impl Imm64 {
@ -148,6 +149,7 @@ impl FromStr for Imm64 {
/// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by
/// zero-extending to `i64`.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Uimm64(u64);
impl Uimm64 {
@ -279,6 +281,7 @@ pub type Uimm8 = u8;
///
/// This is used to represent sizes of memory objects.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Uimm32(u32);
impl Into<u32> for Uimm32 {
@ -362,6 +365,7 @@ impl From<&[u8]> for V128Imm {
/// This is used to encode an immediate offset for load/store instructions. All supported ISAs have
/// a maximum load/store offset that fits in an `i32`.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Offset32(i32);
impl Offset32 {
@ -451,6 +455,7 @@ impl FromStr for Offset32 {
///
/// All bit patterns are allowed.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
#[repr(C)]
pub struct Ieee32(u32);
@ -459,6 +464,7 @@ pub struct Ieee32(u32);
///
/// All bit patterns are allowed.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
#[repr(C)]
pub struct Ieee64(u64);

Просмотреть файл

@ -13,6 +13,9 @@ use core::num::NonZeroU32;
use core::ops::{Deref, DerefMut};
use core::str::FromStr;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
use crate::isa;

Просмотреть файл

@ -8,10 +8,14 @@ use alloc::vec::Vec;
use core::fmt::{self, Display, Formatter};
use core::slice::{Iter, IterMut};
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Contents of a jump table.
///
/// All jump tables use 0-based indexing and are densely populated.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct JumpTableData {
// Table entries.
table: Vec<Block>,
@ -64,6 +68,11 @@ impl JumpTableData {
pub fn iter_mut(&mut self) -> IterMut<Block> {
self.table.iter_mut()
}
/// Clears all entries in this jump table.
pub fn clear(&mut self) {
self.table.clear();
}
}
impl Display for JumpTableData {

Просмотреть файл

@ -781,6 +781,97 @@ impl<'f> DoubleEndedIterator for Insts<'f> {
}
}
/// A custom serialize and deserialize implementation for [`Layout`].
///
/// This doesn't use a derived implementation as [`Layout`] is a manual implementation of a linked
/// list. Storing it directly as a regular list saves a lot of space.
///
/// The following format is used. (notated in EBNF form)
///
/// ```plain
/// data = block_data * ;
/// block_data = "block_id" , "inst_count" , ( "inst_id" * ) ;
/// ```
#[cfg(feature = "enable-serde")]
mod serde {
use ::serde::de::{Deserializer, Error, SeqAccess, Visitor};
use ::serde::ser::{SerializeSeq, Serializer};
use ::serde::{Deserialize, Serialize};
use core::convert::TryFrom;
use core::fmt;
use core::marker::PhantomData;
use super::*;
impl Serialize for Layout {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let size = self.blocks().count() * 2
+ self
.blocks()
.map(|block| self.block_insts(block).count())
.sum::<usize>();
let mut seq = serializer.serialize_seq(Some(size))?;
for block in self.blocks() {
seq.serialize_element(&block)?;
seq.serialize_element(&u32::try_from(self.block_insts(block).count()).unwrap())?;
for inst in self.block_insts(block) {
seq.serialize_element(&inst)?;
}
}
seq.end()
}
}
impl<'de> Deserialize<'de> for Layout {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(LayoutVisitor {
marker: PhantomData,
})
}
}
struct LayoutVisitor {
marker: PhantomData<fn() -> Layout>,
}
impl<'de> Visitor<'de> for LayoutVisitor {
type Value = Layout;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a `cranelift_codegen::ir::Layout`")
}
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut layout = Layout::new();
while let Some(block) = access.next_element::<Block>()? {
layout.append_block(block);
let count = access
.next_element::<u32>()?
.ok_or_else(|| Error::missing_field("count"))?;
for _ in 0..count {
let inst = access
.next_element::<Inst>()?
.ok_or_else(|| Error::missing_field("inst"))?;
layout.append_inst(inst, block);
}
}
Ok(layout)
}
}
}
#[cfg(test)]
mod tests {
use super::Layout;

Просмотреть файл

@ -63,6 +63,7 @@ pub enum LibCall {
/// Elf __tls_get_addr
ElfTlsGetAddr,
// When adding a new variant make sure to add it to `all_libcalls` too.
}
impl fmt::Display for LibCall {
@ -136,6 +137,33 @@ impl LibCall {
_ => return None,
})
}
/// Get a list of all known `LibCall`'s.
pub fn all_libcalls() -> &'static [LibCall] {
use LibCall::*;
&[
Probestack,
UdivI64,
SdivI64,
UremI64,
SremI64,
IshlI64,
UshrI64,
SshrI64,
CeilF32,
CeilF64,
FloorF32,
FloorF64,
TruncF32,
TruncF64,
NearestF32,
NearestF64,
Memcpy,
Memset,
Memmove,
ElfTlsGetAddr,
]
}
}
/// Get a function reference for `libcall` in `func`, following the signature

Просмотреть файл

@ -2,20 +2,40 @@
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
enum FlagBit {
Notrap,
Aligned,
Readonly,
LittleEndian,
BigEndian,
}
const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"];
const NAMES: [&str; 5] = ["notrap", "aligned", "readonly", "little", "big"];
/// Endianness of a memory access.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum Endianness {
/// Little-endian
Little,
/// Big-endian
Big,
}
/// Flags for memory operations like load/store.
///
/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
/// program does not change when a flag is removed, but adding a flag will.
///
/// In addition, the flags determine the endianness of the memory access. By default,
/// any memory access uses the native endianness determined by the target ISA. This can
/// be overridden for individual accesses by explicitly specifying little- or big-endian
/// semantics via the flags.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct MemFlags {
bits: u8,
}
@ -48,16 +68,48 @@ impl MemFlags {
/// Set a flag bit by name.
///
/// Returns true if the flag was found and set, false for an unknown flag name.
/// Will also return false when trying to set inconsistent endianness flags.
pub fn set_by_name(&mut self, name: &str) -> bool {
match NAMES.iter().position(|&s| s == name) {
Some(bit) => {
self.bits |= 1 << bit;
true
let bits = self.bits | 1 << bit;
if (bits & (1 << FlagBit::LittleEndian as usize)) != 0
&& (bits & (1 << FlagBit::BigEndian as usize)) != 0
{
false
} else {
self.bits = bits;
true
}
}
None => false,
}
}
/// Return endianness of the memory access. This will return the endianness
/// explicitly specified by the flags if any, and will default to the native
/// endianness otherwise. The native endianness has to be provided by the
/// caller since it is not explicitly encoded in CLIF IR -- this allows a
/// front end to create IR without having to know the target endianness.
pub fn endianness(self, native_endianness: Endianness) -> Endianness {
if self.read(FlagBit::LittleEndian) {
Endianness::Little
} else if self.read(FlagBit::BigEndian) {
Endianness::Big
} else {
native_endianness
}
}
/// Set endianness of the memory access.
pub fn set_endianness(&mut self, endianness: Endianness) {
match endianness {
Endianness::Little => self.set(FlagBit::LittleEndian),
Endianness::Big => self.set(FlagBit::BigEndian),
};
assert!(!(self.read(FlagBit::LittleEndian) && self.read(FlagBit::BigEndian)));
}
/// Test if the `notrap` flag is set.
///
/// Normally, trapping is part of the semantics of a load/store operation. If the platform

Просмотреть файл

@ -50,7 +50,7 @@ pub use crate::ir::instructions::{
pub use crate::ir::jumptable::JumpTableData;
pub use crate::ir::layout::Layout;
pub use crate::ir::libcall::{get_probestack_funcref, LibCall};
pub use crate::ir::memflags::MemFlags;
pub use crate::ir::memflags::{Endianness, MemFlags};
pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
pub use crate::ir::sourceloc::SourceLoc;
pub use crate::ir::stackslot::{StackLayoutInfo, StackSlotData, StackSlotKind, StackSlots};
@ -58,6 +58,7 @@ pub use crate::ir::table::TableData;
pub use crate::ir::trapcode::TrapCode;
pub use crate::ir::types::Type;
pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc};
pub use crate::value_label::LabelValueLoc;
pub use cranelift_codegen_shared::condcodes;
use crate::binemit;
@ -90,6 +91,7 @@ entity_impl!(ValueLabel, "val");
/// A label of a Value.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct ValueLabelStart {
/// Source location when it is in effect
pub from: SourceLoc,
@ -100,6 +102,7 @@ pub struct ValueLabelStart {
/// Value label assignements: label starts or value aliases.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum ValueLabelAssignments {
/// Original value labels assigned at transform.
Starts(alloc::vec::Vec<ValueLabelStart>),

Просмотреть файл

@ -4,8 +4,12 @@ use crate::ir::immediates::Uimm64;
use crate::ir::{GlobalValue, Type};
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Information about a table declaration.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct TableData {
/// Global value giving the address of the start of the table.
pub base_gv: GlobalValue,

Просмотреть файл

@ -4,15 +4,18 @@ use crate::ir;
use crate::ir::types;
use crate::ir::types::*;
use crate::ir::MemFlags;
use crate::ir::Opcode;
use crate::ir::{ExternalName, LibCall};
use crate::isa;
use crate::isa::aarch64::{inst::EmitState, inst::*};
use crate::isa::unwind::UnwindInst;
use crate::machinst::*;
use crate::settings;
use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
// these ABIs are very similar.
@ -76,7 +79,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
Some(ABIArg::reg(
xreg(BALDRDASH_TLS_REG).to_real_reg(),
ir::types::I64,
param.extension,
@ -85,7 +88,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
Some(ABIArg::reg(
xreg(BALDRDASH_SIG_REG).to_real_reg(),
ir::types::I64,
param.extension,
@ -95,7 +98,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
Some(ABIArg::stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
@ -105,7 +108,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
Some(ABIArg::stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
@ -159,6 +162,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@ -167,6 +171,21 @@ impl ABIMachineSpec for AArch64MachineDeps {
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
//
// MacOS aarch64 is slightly different, see also
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
// We are diverging from the MacOS aarch64 implementation in the
// following ways:
// - sign- and zero- extensions of data types less than 32 bits are not
// implemented yet.
// - i128 arguments passing isn't implemented yet in the standard (non
// MacOS) aarch64 ABI.
// - we align the arguments stack space to a 16-bytes boundary, while
// the MacOS allows aligning only on 8 bytes. In practice it means we're
// slightly overallocating when calling, which is fine, and doesn't
// break our other invariants that the stack is always allocated in
// 16-bytes chunks.
let mut next_xreg = 0;
let mut next_vreg = 0;
let mut next_stack: u64 = 0;
@ -208,7 +227,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
| &ir::ArgumentPurpose::StackLimit
| &ir::ArgumentPurpose::SignatureId
| &ir::ArgumentPurpose::CallerTLS
| &ir::ArgumentPurpose::CalleeTLS => {}
| &ir::ArgumentPurpose::CalleeTLS
| &ir::ArgumentPurpose::StructReturn
| &ir::ArgumentPurpose::StructArgument(_) => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
@ -220,7 +241,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
"Invalid type for AArch64: {:?}",
param.value_type
);
let rc = Inst::rc_for_type(param.value_type).unwrap();
let (rcs, _) = Inst::rc_for_type(param.value_type).unwrap();
assert!(rcs.len() == 1, "Multi-reg values not supported yet");
let rc = rcs[0];
let next_reg = match rc {
RegClass::I64 => &mut next_xreg,
@ -231,13 +254,23 @@ impl ABIMachineSpec for AArch64MachineDeps {
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
assert!(rc == RegClass::I64);
ret.push(param);
} else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size as u64;
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size;
ret.push(ABIArg::StructArg {
offset,
size,
purpose: param.purpose,
});
} else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
let reg = match rc {
RegClass::I64 => xreg(*next_reg),
RegClass::V128 => vreg(*next_reg),
_ => unreachable!(),
};
ret.push(ABIArg::Reg(
ret.push(ABIArg::reg(
reg.to_real_reg(),
param.value_type,
param.extension,
@ -246,14 +279,25 @@ impl ABIMachineSpec for AArch64MachineDeps {
*next_reg += 1;
remaining_reg_vals -= 1;
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
// Compute the stack slot's size.
let size = (ty_bits(param.value_type) / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
let size = if call_conv != isa::CallConv::AppleAarch64 {
// Every arg takes a minimum slot of 8 bytes. (16-byte stack
// alignment happens separately after all args.)
std::cmp::max(size, 8)
} else {
// MacOS aarch64 allows stack slots with sizes less than 8
// bytes. They still need to be properly aligned on their
// natural data alignment, though.
size
};
// Align the stack slot.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack(
next_stack = align_to(next_stack, size);
ret.push(ABIArg::stack(
next_stack as i64,
param.value_type,
param.extension,
@ -270,14 +314,14 @@ impl ABIMachineSpec for AArch64MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
ret.push(ABIArg::Reg(
ret.push(ABIArg::reg(
xreg(next_xreg).to_real_reg(),
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
ret.push(ABIArg::Stack(
ret.push(ABIArg::stack(
next_stack as i64,
I64,
ir::ArgumentExtension::None,
@ -290,7 +334,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
None
};
next_stack = (next_stack + 15) & !15;
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.
@ -345,7 +389,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
Inst::Ret
}
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
let imm = imm as u64;
let mut insts = SmallVec::new();
if let Some(imm12) = Imm12::maybe_from_u64(imm) {
@ -370,7 +414,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
insts.push(Inst::AluRRRExtend {
alu_op: ALUOp::SubS64,
@ -411,7 +455,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
}
fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
if amount == 0 {
return SmallVec::new();
}
@ -455,8 +499,17 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
}
fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::Aarch64SetPointerAuth {
return_addresses: false,
},
});
}
// stp fp (x29), lr (x30), [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: fp_reg(),
@ -467,6 +520,15 @@ impl ABIMachineSpec for AArch64MachineDeps {
),
flags: MemFlags::trusted(),
});
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp: 16, // FP, LR
},
});
}
// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
// the usual encoding (`ORR`) does not work with SP.
insts.push(Inst::AluRRImm12 {
@ -481,20 +543,14 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
// MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
// MOV to SP is an alias of ADD.
insts.push(Inst::AluRRImm12 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
rn: fp_reg(),
imm12: Imm12 {
bits: 0,
shift12: false,
},
});
// N.B.: sp is already adjusted to the appropriate place by the
// clobber-restore code (which also frees the fixed frame). Hence, there
// is no need for the usual `mov sp, fp` here.
// `ldp fp, lr, [sp], #16`
insts.push(Inst::LoadP64 {
rt: writable_fp_reg(),
rt2: writable_link_reg(),
@ -504,29 +560,56 @@ impl ABIMachineSpec for AArch64MachineDeps {
),
flags: MemFlags::trusted(),
});
insts
}
fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
// TODO: implement if we ever require stack probes on an AArch64 host
// (unlikely unless Lucet is ported)
smallvec![]
}
// Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(
call_conv: isa::CallConv,
_: &settings::Flags,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32;
insts.extend(Self::gen_sp_reg_adjust(
-(total_save_bytes + fixed_frame_storage_size as i32),
));
let total_save_bytes = int_save_bytes + vec_save_bytes;
let clobber_size = total_save_bytes as i32;
for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
if flags.unwind_info() {
// The *unwind* frame (but not the actual frame) starts at the
// clobbers, just below the saved FP/LR pair.
insts.push(Inst::Unwind {
inst: UnwindInst::DefineNewFrame {
offset_downward_to_clobbers: clobber_size as u32,
offset_upward_to_caller_sp: 16, // FP, LR
},
});
}
// We use pre-indexed addressing modes here, rather than the possibly
// more efficient "subtract sp once then used fixed offsets" scheme,
// because (i) we cannot necessarily guarantee that the offset of a
// clobber-save slot will be within a SImm7Scaled (+504-byte) offset
// range of the whole frame including other slots, it is more complex to
// conditionally generate a two-stage SP adjustment (clobbers then fixed
// frame) otherwise, and generally we just want to maintain simplicity
// here for maintainability. Because clobbers are at the top of the
// frame, just below FP, all that is necessary is to use the pre-indexed
// "push" `[sp, #-16]!` addressing mode.
//
// `frame_offset` tracks offset above start-of-clobbers for unwind-info
// purposes.
let mut clobber_offset = clobber_size as u32;
for reg_pair in clobbered_int.chunks(2) {
let (r1, r2) = if reg_pair.len() == 2 {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
@ -537,28 +620,56 @@ impl ABIMachineSpec for AArch64MachineDeps {
debug_assert!(r1.get_class() == RegClass::I64);
debug_assert!(r2.get_class() == RegClass::I64);
// stp r1, r2, [sp, #(i * #16)]
// stp r1, r2, [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: r1,
rt2: r2,
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
),
flags: MemFlags::trusted(),
});
if flags.unwind_info() {
clobber_offset -= 8;
if r2 != zero_reg() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: r2.to_real_reg(),
},
});
}
clobber_offset -= 8;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: r1.to_real_reg(),
},
});
}
}
let vec_offset = int_save_bytes;
for (i, reg) in clobbered_vec.iter().enumerate() {
for reg in clobbered_vec.iter() {
insts.push(Inst::FpuStore128 {
rd: reg.to_reg().to_reg(),
mem: AMode::Unscaled(
stack_reg(),
SImm9::maybe_from_i64((vec_offset + (i * 16)) as i64).unwrap(),
),
mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()),
flags: MemFlags::trusted(),
});
if flags.unwind_info() {
clobber_offset -= 16;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: reg.to_reg(),
},
});
}
}
// Allocate the fixed frame below the clobbers if necessary.
if fixed_frame_storage_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)));
}
(total_save_bytes as u64, insts)
@ -568,14 +679,25 @@ impl ABIMachineSpec for AArch64MachineDeps {
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
_fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
fixed_frame_storage_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
// Free the fixed frame if necessary.
if fixed_frame_storage_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
}
for reg in clobbered_vec.iter().rev() {
insts.push(Inst::FpuLoad128 {
rd: Writable::from_reg(reg.to_reg().to_reg()),
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
flags: MemFlags::trusted(),
});
}
for reg_pair in clobbered_int.chunks(2).rev() {
let (r1, r2) = if reg_pair.len() == 2 {
(
reg_pair[0].map(|r| r.to_reg()),
@ -588,37 +710,18 @@ impl ABIMachineSpec for AArch64MachineDeps {
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
// ldp r1, r2, [sp, #(i * 16)]
// ldp r1, r2, [sp], #16
insts.push(Inst::LoadP64 {
rt: r1,
rt2: r2,
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
),
flags: MemFlags::trusted(),
});
}
for (i, reg) in clobbered_vec.iter().enumerate() {
insts.push(Inst::FpuLoad128 {
rd: Writable::from_reg(reg.to_reg().to_reg()),
mem: AMode::Unscaled(
stack_reg(),
SImm9::maybe_from_i64(((i * 16) + int_save_bytes) as i64).unwrap(),
),
flags: MemFlags::trusted(),
});
}
// For non-baldrdash calling conventions, the frame pointer
// will be moved into the stack pointer in the epilogue, so we
// can skip restoring the stack pointer value with this `add`.
if call_conv.extends_baldrdash() {
let total_save_bytes = (int_save_bytes + vec_save_bytes) as i32;
insts.extend(Self::gen_sp_reg_adjust(total_save_bytes));
}
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS
// register. We may have allocated it for something else and clobbered
// it, but the ABI expects us to leave the TLS register unchanged.
@ -700,6 +803,34 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_memcpy(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
) -> SmallVec<[Self::I; 8]> {
// Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new();
let arg0 = writable_xreg(0);
let arg1 = writable_xreg(1);
let arg2 = writable_xreg(2);
insts.push(Inst::gen_move(arg0, dst, I64));
insts.push(Inst::gen_move(arg1, src, I64));
insts.extend(Inst::load_constant(arg2, size as u64).into_iter());
insts.push(Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::Memcpy),
uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()],
defs: Self::get_regs_clobbered_by_call(call_conv),
opcode: Opcode::Call,
caller_callconv: call_conv,
callee_callconv: call_conv,
}),
});
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
@ -736,6 +867,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
caller_saved
}
fn get_ext_mode(
call_conv: isa::CallConv,
specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
if call_conv.extends_baldrdash() {
// Baldrdash (SpiderMonkey) always extends args and return values to the full register.
specified
} else {
// No other supported ABI on AArch64 does so.
ir::ArgumentExtension::None
}
}
}
/// Is this type supposed to be seen on this machine? E.g. references of the

Просмотреть файл

@ -3,7 +3,7 @@
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
use crate::ir::types::*;
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, MachLabel};
@ -209,6 +209,19 @@ impl AMode {
pub fn label(label: MemLabel) -> AMode {
AMode::Label(label)
}
/// Does the address resolve to just a register value, with no offset or
/// other computation?
pub fn is_reg(&self) -> Option<Reg> {
match self {
&AMode::UnsignedOffset(r, uimm12) if uimm12.value() == 0 => Some(r),
&AMode::Unscaled(r, imm9) if imm9.value() == 0 => Some(r),
&AMode::RegOffset(r, off, _) if off == 0 => Some(r),
&AMode::FPOffset(off, _) if off == 0 => Some(fp_reg()),
&AMode::SPOffset(off, _) if off == 0 => Some(stack_reg()),
_ => None,
}
}
}
/// A memory argument to a load/store-pair.
@ -588,6 +601,14 @@ impl ScalarSize {
}
}
/// Convert from an integer operand size.
pub fn from_operand_size(size: OperandSize) -> ScalarSize {
match size {
OperandSize::Size32 => ScalarSize::Size32,
OperandSize::Size64 => ScalarSize::Size64,
}
}
/// Convert from a type into the smallest size that fits.
pub fn from_ty(ty: Type) -> ScalarSize {
Self::from_bits(ty_bits(ty))

Просмотреть файл

@ -258,10 +258,6 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
| machreg_to_vec(rt.to_reg())
}
fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
(top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
}
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
(top11 << 21)
| (machreg_to_vec(rm) << 16)
@ -313,6 +309,12 @@ fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
| (cond.invert().bits() << 12)
}
fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
0b110_11010100_11111_0000_00_11111_00000
| machreg_to_gpr(rd.to_reg())
| (cond.invert().bits() << 12)
}
fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
0b0_1_1_11010010_00000_0000_10_00000_0_0000
| size.sf_bit() << 31
@ -322,6 +324,29 @@ fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond)
| nzcv.bits()
}
fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
match size {
OperandSize::Size64 => {
debug_assert!(immr <= 63);
debug_assert!(imms <= 63);
}
OperandSize::Size32 => {
debug_assert!(immr <= 31);
debug_assert!(imms <= 31);
}
}
debug_assert_eq!(opc & 0b11, opc);
let n_bit = size.sf_bit();
0b0_00_100110_0_000000_000000_00000_00000
| size.sf_bit() << 31
| u32::from(opc) << 29
| n_bit << 22
| u32::from(immr) << 16
| u32::from(imms) << 10
| machreg_to_gpr(rn) << 5
| machreg_to_gpr(rd.to_reg())
}
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
0b00001110_101_00000_00011_1_00000_00000
| ((is_16b as u32) << 30)
@ -437,6 +462,16 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
| machreg_to_gpr(rt)
}
fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
debug_assert_eq!(size & 0b11, size);
0b00_0010001_1_1_00000_1_11111_00000_00000
| size << 30
| machreg_to_gpr(rs.to_reg()) << 16
| machreg_to_gpr(rn) << 5
| machreg_to_gpr(rt)
}
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
let abc = (imm >> 5) as u32;
let defgh = (imm & 0b11111) as u32;
@ -517,7 +552,6 @@ impl MachInstEmitInfo for EmitInfo {
impl MachInstEmit for Inst {
type State = EmitState;
type Info = EmitInfo;
type UnwindInfo = super::unwind::AArch64UnwindInfo;
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
// N.B.: we *must* not exceed the "worst-case size" used to compute
@ -1045,6 +1079,9 @@ impl MachInstEmit for Inst {
&Inst::CSet { rd, cond } => {
sink.put4(enc_cset(rd, cond));
}
&Inst::CSetm { rd, cond } => {
sink.put4(enc_csetm(rd, cond));
}
&Inst::CCmpImm {
size,
rn,
@ -1109,6 +1146,11 @@ impl MachInstEmit for Inst {
inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
inst_common::AtomicRmwOp::Nand
| inst_common::AtomicRmwOp::Umin
| inst_common::AtomicRmwOp::Umax
| inst_common::AtomicRmwOp::Smin
| inst_common::AtomicRmwOp::Smax => todo!("{:?}", op),
inst_common::AtomicRmwOp::Xchg => unreachable!(),
};
sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
@ -1132,7 +1174,18 @@ impl MachInstEmit for Inst {
sink.put4(enc_dmb_ish()); // dmb ish
}
&Inst::AtomicCAS { ty } => {
&Inst::AtomicCAS { rs, rt, rn, ty } => {
let size = match ty {
I8 => 0b00,
I16 => 0b01,
I32 => 0b10,
I64 => 0b11,
_ => panic!("Unsupported type: {}", ty),
};
sink.put4(enc_cas(size, rs, rt, rn));
}
&Inst::AtomicCASLoop { ty } => {
/* Emit this:
dmb ish
again:
@ -1264,7 +1317,7 @@ impl MachInstEmit for Inst {
sink.put4(enc_dmb_ish()); // dmb ish
}
&Inst::FpuMove64 { rd, rn } => {
sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
}
&Inst::FpuMove128 { rd, rn } => {
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
@ -1284,6 +1337,13 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::FpuExtend { rd, rn, size } => {
sink.put4(enc_fpurr(
0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
rd,
rn,
));
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let top22 = match fpu_op {
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
@ -1428,12 +1488,18 @@ impl MachInstEmit for Inst {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b0, 0b11000, enc_size | 0b10)
}
VecMisc2::Cnt => {
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
(0b0, 0b00101, enc_size)
}
};
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
}
&Inst::VecLanes { op, rd, rn, size } => {
let (q, size) = match size {
VectorSize::Size8x8 => (0b0, 0b00),
VectorSize::Size8x16 => (0b1, 0b00),
VectorSize::Size16x4 => (0b0, 0b01),
VectorSize::Size16x8 => (0b1, 0b01),
VectorSize::Size32x4 => (0b1, 0b10),
_ => unreachable!(),
@ -1718,6 +1784,17 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecDupFPImm { rd, imm, size } => {
let imm = imm.enc_bits();
let op = match size.lane_size() {
ScalarSize::Size32 => 0,
ScalarSize::Size64 => 1,
_ => unimplemented!(),
};
let q_op = op | ((size.is_128bits() as u32) << 1);
sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
}
&Inst::VecDupImm {
rd,
imm,
@ -1985,73 +2062,47 @@ impl MachInstEmit for Inst {
&Inst::Extend {
rd,
rn,
signed,
from_bits,
signed: false,
from_bits: 1,
to_bits,
} if from_bits >= 8 => {
let top22 = match (signed, from_bits, to_bits) {
(false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
(false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
(true, 8, 32) => 0b000_100110_0_000000_000111, // SXTB (32)
(true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
// The 64-bit unsigned variants are the same as the 32-bit ones,
// because writes to Wn zero out the top 32 bits of Xn
(false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
(false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
(true, 8, 64) => 0b100_100110_1_000000_000111, // SXTB (64)
(true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
// 32-to-64: the unsigned case is a 'mov' (special-cased below).
(false, 32, 64) => 0, // MOV
(true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
_ => panic!(
"Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
signed, from_bits, to_bits
),
};
if top22 != 0 {
sink.put4(enc_extend(top22, rd, rn));
} else {
Inst::mov32(rd, rn).emit(sink, emit_info, state);
}
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} if from_bits == 1 && signed => {
assert!(to_bits <= 64);
// Reduce sign-extend-from-1-bit to:
// - and rd, rn, #1
// - sub rd, zr, rd
// We don't have ImmLogic yet, so we just hardcode this. FIXME.
sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
let sub_inst = Inst::AluRRR {
alu_op: ALUOp::Sub64,
rd,
rn: zero_reg(),
rm: rd.to_reg(),
};
sub_inst.emit(sink, emit_info, state);
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} if from_bits == 1 && !signed => {
} => {
assert!(to_bits <= 64);
// Reduce zero-extend-from-1-bit to:
// - and rd, rn, #1
// We don't have ImmLogic yet, so we just hardcode this. FIXME.
sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
// Note: This is special cased as UBFX may take more cycles
// than AND on smaller cores.
let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
Inst::AluRRImmLogic {
alu_op: ALUOp::And32,
rd,
rn,
imml,
}
.emit(sink, emit_info, state);
}
&Inst::Extend { .. } => {
panic!("Unsupported extend variant");
&Inst::Extend {
rd,
rn,
signed: false,
from_bits: 32,
to_bits: 64,
} => {
let mov = Inst::Mov32 { rd, rm: rn };
mov.emit(sink, emit_info, state);
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} => {
let (opc, size) = if signed {
(0b00, OperandSize::from_bits(to_bits))
} else {
(0b10, OperandSize::Size32)
};
sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
}
&Inst::Jump { ref dest } => {
let off = sink.cur_offset();
@ -2293,7 +2344,7 @@ impl MachInstEmit for Inst {
add.emit(sink, emit_info, state);
} else if offset == 0 {
if reg != rd.to_reg() {
let mov = Inst::mov(rd, reg);
let mov = Inst::Mov64 { rd, rm: reg };
mov.emit(sink, emit_info, state);
}
@ -2345,6 +2396,13 @@ impl MachInstEmit for Inst {
sink.bind_label(jump_around_label);
}
}
&Inst::ValueLabelMarker { .. } => {
// Nothing; this is only used to compute debug info.
}
&Inst::Unwind { ref inst } => {
sink.add_unwind(inst.clone());
}
}
let end_off = sink.cur_offset();

Просмотреть файл

@ -1846,6 +1846,22 @@ fn test_aarch64_binemit() {
"EFB79F9A",
"cset x15, ge",
));
insns.push((
Inst::CSetm {
rd: writable_xreg(0),
cond: Cond::Eq,
},
"E0139FDA",
"csetm x0, eq",
));
insns.push((
Inst::CSetm {
rd: writable_xreg(16),
cond: Cond::Vs,
},
"F0739FDA",
"csetm x16, vs",
));
insns.push((
Inst::CCmpImm {
size: OperandSize::Size64,
@ -2056,6 +2072,24 @@ fn test_aarch64_binemit() {
"5205084E",
"dup v18.2d, v10.d[0]",
));
insns.push((
Inst::VecDupFPImm {
rd: writable_vreg(31),
imm: ASIMDFPModImm::maybe_from_u64(1_f32.to_bits() as u64, ScalarSize::Size32).unwrap(),
size: VectorSize::Size32x2,
},
"1FF6030F",
"fmov v31.2s, #1",
));
insns.push((
Inst::VecDupFPImm {
rd: writable_vreg(0),
imm: ASIMDFPModImm::maybe_from_u64(2_f64.to_bits(), ScalarSize::Size64).unwrap(),
size: VectorSize::Size64x2,
},
"00F4006F",
"fmov v0.2d, #2",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(31),
@ -2066,16 +2100,96 @@ fn test_aarch64_binemit() {
"FFE7074F",
"movi v31.16b, #255",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(30),
imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size16).unwrap(),
invert: false,
size: VectorSize::Size16x8,
},
"1E84004F",
"movi v30.8h, #0",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(0),
imm: ASIMDMovModImm::zero(),
imm: ASIMDMovModImm::zero(ScalarSize::Size16),
invert: true,
size: VectorSize::Size16x4,
},
"0084002F",
"mvni v0.4h, #0",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(0),
imm: ASIMDMovModImm::maybe_from_u64(256, ScalarSize::Size16).unwrap(),
invert: false,
size: VectorSize::Size16x8,
},
"20A4004F",
"movi v0.8h, #1, LSL #8",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(8),
imm: ASIMDMovModImm::maybe_from_u64(2228223, ScalarSize::Size32).unwrap(),
invert: false,
size: VectorSize::Size32x4,
},
"28D4014F",
"movi v8.4s, #33, MSL #16",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(16),
imm: ASIMDMovModImm::maybe_from_u64(35071, ScalarSize::Size32).unwrap(),
invert: true,
size: VectorSize::Size32x2,
},
"10C5042F",
"mvni v16.2s, #136, MSL #8",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(1),
imm: ASIMDMovModImm::maybe_from_u64(0, ScalarSize::Size32).unwrap(),
invert: false,
size: VectorSize::Size32x2,
},
"0104000F",
"movi v1.2s, #0",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(24),
imm: ASIMDMovModImm::maybe_from_u64(1107296256, ScalarSize::Size32).unwrap(),
invert: false,
size: VectorSize::Size32x4,
},
"5864024F",
"movi v24.4s, #66, LSL #24",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(8),
imm: ASIMDMovModImm::zero(ScalarSize::Size64),
invert: false,
size: VectorSize::Size64x2,
},
"08E4006F",
"movi v8.2d, #0",
));
insns.push((
Inst::VecDupImm {
rd: writable_vreg(7),
imm: ASIMDMovModImm::maybe_from_u64(18374687574904995840, ScalarSize::Size64).unwrap(),
invert: false,
size: VectorSize::Size64x2,
},
"87E6046F",
"movi v7.2d, #18374687574904995840",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
@ -3678,6 +3792,28 @@ fn test_aarch64_binemit() {
"frintp v12.2d, v17.2d",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cnt,
rd: writable_vreg(23),
rn: vreg(5),
size: VectorSize::Size8x8,
},
"B758200E",
"cnt v23.8b, v5.8b",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Uminv,
rd: writable_vreg(0),
rn: vreg(31),
size: VectorSize::Size8x8,
},
"E0AB312E",
"uminv b0, v31.8b",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Uminv,
@ -3722,6 +3858,17 @@ fn test_aarch64_binemit() {
"addv b2, v29.16b",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Addv,
rd: writable_vreg(15),
rn: vreg(7),
size: VectorSize::Size16x4,
},
"EFB8710E",
"addv h15, v7.4h",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Addv,
@ -3952,6 +4099,50 @@ fn test_aarch64_binemit() {
"vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
));
insns.push((
Inst::Extend {
rd: writable_xreg(3),
rn: xreg(5),
signed: false,
from_bits: 1,
to_bits: 32,
},
"A3000012",
"and w3, w5, #1",
));
insns.push((
Inst::Extend {
rd: writable_xreg(3),
rn: xreg(5),
signed: false,
from_bits: 1,
to_bits: 64,
},
"A3000012",
"and w3, w5, #1",
));
insns.push((
Inst::Extend {
rd: writable_xreg(10),
rn: xreg(21),
signed: true,
from_bits: 1,
to_bits: 32,
},
"AA020013",
"sbfx w10, w21, #0, #1",
));
insns.push((
Inst::Extend {
rd: writable_xreg(1),
rn: xreg(2),
signed: true,
from_bits: 1,
to_bits: 64,
},
"41004093",
"sbfx x1, x2, #0, #1",
));
insns.push((
Inst::Extend {
rd: writable_xreg(1),
@ -4005,7 +4196,7 @@ fn test_aarch64_binemit() {
to_bits: 64,
},
"411C0053",
"uxtb x1, w2",
"uxtb w1, w2",
));
insns.push((
Inst::Extend {
@ -4027,7 +4218,7 @@ fn test_aarch64_binemit() {
to_bits: 64,
},
"413C0053",
"uxth x1, w2",
"uxth w1, w2",
));
insns.push((
Inst::Extend {
@ -4281,8 +4472,8 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(4),
},
"881CA40E",
"mov v8.8b, v4.8b",
"8840601E",
"fmov d8, d4",
));
insns.push((
@ -4316,6 +4507,16 @@ fn test_aarch64_binemit() {
"mov d23, v11.d[0]",
));
insns.push((
Inst::FpuExtend {
rd: writable_vreg(31),
rn: vreg(0),
size: ScalarSize::Size32,
},
"1F40201E",
"fmov s31, s0",
));
insns.push((
Inst::FpuRR {
fpu_op: FPUOp1::Abs32,
@ -5034,9 +5235,48 @@ fn test_aarch64_binemit() {
"BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
"atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(28),
rt: xreg(20),
rn: xreg(10),
ty: I8,
},
"54FDFC08",
"casalb w28, w20, [x10]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(2),
rt: xreg(19),
rn: xreg(23),
ty: I16,
},
"F3FEE248",
"casalh w2, w19, [x23]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(0),
rt: zero_reg(),
rn: stack_reg(),
ty: I32,
},
"FFFFE088",
"casal w0, wzr, [sp]",
));
insns.push((
Inst::AtomicCAS {
rs: writable_xreg(7),
rt: xreg(15),
rn: xreg(27),
ty: I64,
},
"6FFFE7C8",
"casal x7, x15, [x27]",
));
insns.push((
Inst::AtomicCASLoop {
ty: I8,
},
"BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
@ -5044,7 +5284,7 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::AtomicCAS {
Inst::AtomicCASLoop {
ty: I64,
},
"BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",

Просмотреть файл

@ -668,39 +668,208 @@ impl MoveWideConst {
}
/// Advanced SIMD modified immediate as used by MOVI/MVNI.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ASIMDMovModImm {
imm: u8,
shift: u8,
is_64bit: bool,
shift_ones: bool,
}
impl ASIMDMovModImm {
/// Construct an ASIMDMovModImm from an arbitrary 64-bit constant, if possible.
/// Note that the bits in `value` outside of the range specified by `size` are
/// ignored; for example, in the case of `ScalarSize::Size8` all bits above the
/// lowest 8 are ignored.
pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
match size {
ScalarSize::Size8 => Some(ASIMDMovModImm {
imm: value as u8,
shift: 0,
is_64bit: false,
shift_ones: false,
}),
ScalarSize::Size16 => {
let value = value as u16;
if value >> 8 == 0 {
Some(ASIMDMovModImm {
imm: value as u8,
shift: 0,
is_64bit: false,
shift_ones: false,
})
} else if value as u8 == 0 {
Some(ASIMDMovModImm {
imm: (value >> 8) as u8,
shift: 8,
is_64bit: false,
shift_ones: false,
})
} else {
None
}
}
ScalarSize::Size32 => {
let value = value as u32;
// Value is of the form 0x00MMFFFF.
if value & 0xFF00FFFF == 0x0000FFFF {
let imm = (value >> 16) as u8;
Some(ASIMDMovModImm {
imm,
shift: 16,
is_64bit: false,
shift_ones: true,
})
// Value is of the form 0x0000MMFF.
} else if value & 0xFFFF00FF == 0x000000FF {
let imm = (value >> 8) as u8;
Some(ASIMDMovModImm {
imm,
shift: 8,
is_64bit: false,
shift_ones: true,
})
} else {
// Of the 4 bytes, at most one is non-zero.
for shift in (0..32).step_by(8) {
if value & (0xFF << shift) == value {
return Some(ASIMDMovModImm {
imm: (value >> shift) as u8,
shift,
is_64bit: false,
shift_ones: false,
});
}
}
None
}
}
ScalarSize::Size64 => {
let mut imm = 0u8;
// Check if all bytes are either 0 or 0xFF.
for i in 0..8 {
let b = (value >> (i * 8)) as u8;
if b == 0 || b == 0xFF {
imm |= (b & 1) << i;
} else {
return None;
}
}
Some(ASIMDMovModImm {
imm,
shift: 0,
is_64bit: true,
shift_ones: false,
})
}
_ => None,
}
}
/// Create a zero immediate of this format.
pub fn zero() -> Self {
pub fn zero(size: ScalarSize) -> Self {
ASIMDMovModImm {
imm: 0,
shift: 0,
is_64bit: size == ScalarSize::Size64,
shift_ones: false,
}
}
/// Returns the value that this immediate represents.
pub fn value(&self) -> (u8, u32, bool) {
(self.imm, self.shift as u32, self.shift_ones)
}
}
/// Advanced SIMD modified immediate as used by the vector variant of FMOV.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ASIMDFPModImm {
imm: u8,
is_64bit: bool,
}
impl ASIMDFPModImm {
/// Construct an ASIMDFPModImm from an arbitrary 64-bit constant, if possible.
pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDFPModImm> {
// In all cases immediates are encoded as an 8-bit number 0b_abcdefgh;
// let `D` be the inverse of the digit `d`.
match size {
ScalarSize::Size32 => {
// In this case the representable immediates are 32-bit numbers of the form
// 0b_aBbb_bbbc_defg_h000 shifted to the left by 16.
let value = value as u32;
let b0_5 = (value >> 19) & 0b111111;
let b6 = (value >> 19) & (1 << 6);
let b7 = (value >> 24) & (1 << 7);
let imm = (b0_5 | b6 | b7) as u8;
if value == Self::value32(imm) {
Some(ASIMDFPModImm {
imm,
is_64bit: false,
})
} else {
None
}
}
ScalarSize::Size64 => {
// In this case the representable immediates are 64-bit numbers of the form
// 0b_aBbb_bbbb_bbcd_efgh shifted to the left by 48.
let b0_5 = (value >> 48) & 0b111111;
let b6 = (value >> 48) & (1 << 6);
let b7 = (value >> 56) & (1 << 7);
let imm = (b0_5 | b6 | b7) as u8;
if value == Self::value64(imm) {
Some(ASIMDFPModImm {
imm,
is_64bit: true,
})
} else {
None
}
}
_ => None,
}
}
/// Returns bits ready for encoding.
pub fn enc_bits(&self) -> u8 {
self.imm
}
/// Returns the 32-bit value that corresponds to an 8-bit encoding.
fn value32(imm: u8) -> u32 {
let imm = imm as u32;
let b0_5 = imm & 0b111111;
let b6 = (imm >> 6) & 1;
let b6_inv = b6 ^ 1;
let b7 = (imm >> 7) & 1;
b0_5 << 19 | (b6 * 0b11111) << 25 | b6_inv << 30 | b7 << 31
}
/// Returns the 64-bit value that corresponds to an 8-bit encoding.
fn value64(imm: u8) -> u64 {
let imm = imm as u64;
let b0_5 = imm & 0b111111;
let b6 = (imm >> 6) & 1;
let b6_inv = b6 ^ 1;
let b7 = (imm >> 7) & 1;
b0_5 << 48 | (b6 * 0b11111111) << 54 | b6_inv << 62 | b7 << 63
}
}
impl PrettyPrint for NZCV {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
@ -782,7 +951,20 @@ impl PrettyPrint for MoveWideConst {
impl PrettyPrint for ASIMDMovModImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
if self.shift == 0 {
if self.is_64bit {
debug_assert_eq!(self.shift, 0);
let enc_imm = self.imm as i8;
let mut imm = 0u64;
for i in 0..8 {
let b = (enc_imm >> i) & 1;
imm |= (-b as u8 as u64) << (i * 8);
}
format!("#{}", imm)
} else if self.shift == 0 {
format!("#{}", self.imm)
} else {
let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
@ -791,6 +973,16 @@ impl PrettyPrint for ASIMDMovModImm {
}
}
impl PrettyPrint for ASIMDFPModImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
if self.is_64bit {
format!("#{}", f64::from_bits(Self::value64(self.imm)))
} else {
format!("#{}", f32::from_bits(Self::value32(self.imm)))
}
}
}
#[cfg(test)]
mod test {
use super::*;
@ -1022,4 +1214,44 @@ mod test {
unreachable!();
}
}
#[test]
fn asimd_fp_mod_imm_test() {
assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size32));
assert_eq!(
None,
ASIMDFPModImm::maybe_from_u64(0.013671875_f32.to_bits() as u64, ScalarSize::Size32)
);
assert_eq!(None, ASIMDFPModImm::maybe_from_u64(0, ScalarSize::Size64));
assert_eq!(
None,
ASIMDFPModImm::maybe_from_u64(10000_f64.to_bits(), ScalarSize::Size64)
);
}
#[test]
fn asimd_mov_mod_imm_test() {
assert_eq!(
None,
ASIMDMovModImm::maybe_from_u64(513, ScalarSize::Size16)
);
assert_eq!(
None,
ASIMDMovModImm::maybe_from_u64(4278190335, ScalarSize::Size32)
);
assert_eq!(
None,
ASIMDMovModImm::maybe_from_u64(8388608, ScalarSize::Size64)
);
assert_eq!(
Some(ASIMDMovModImm {
imm: 66,
shift: 16,
is_64bit: false,
shift_ones: true,
}),
ASIMDMovModImm::maybe_from_u64(4390911, ScalarSize::Size32)
);
}
}

Просмотреть файл

@ -5,10 +5,10 @@
use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
};
use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type, ValueLabel};
use crate::isa::unwind::UnwindInst;
use crate::isa::CallConv;
use crate::machinst::*;
use crate::{settings, CodegenError, CodegenResult};
@ -332,6 +332,8 @@ pub enum VecMisc2 {
Frintm,
/// Floating point round to integral, rounding towards plus infinity
Frintp,
/// Population count per byte
Cnt,
}
/// A Vector narrowing operation with two registers.
@ -660,6 +662,12 @@ pub enum Inst {
cond: Cond,
},
/// A conditional-set-mask operation.
CSetm {
rd: Writable<Reg>,
cond: Cond,
},
/// A conditional comparison with an immediate.
CCmpImm {
size: OperandSize,
@ -688,19 +696,26 @@ pub enum Inst {
op: inst_common::AtomicRmwOp,
},
/// An atomic compare-and-swap operation. This instruction is sequentially consistent.
AtomicCAS {
rs: Writable<Reg>,
rt: Reg,
rn: Reg,
ty: Type,
},
/// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
/// store-conditional loop. (Although we could possibly implement it more directly using
/// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is
/// both preceded and followed by a fence which is at least as comprehensive as that of the
/// `Fence` instruction below. This instruction is sequentially consistent. Note that the
/// operand conventions, although very similar to AtomicRMW, are different:
/// store-conditional loop. The sequence is both preceded and followed by a fence which is
/// at least as comprehensive as that of the `Fence` instruction below. This instruction
/// is sequentially consistent. Note that the operand conventions, although very similar
/// to AtomicRMW, are different:
///
/// x25 (rd) address
/// x26 (rd) expected value
/// x28 (rd) replacement value
/// x27 (wr) old value
/// x24 (wr) scratch reg; value afterwards has no meaning
AtomicCAS {
AtomicCASLoop {
ty: Type, // I8, I16, I32 or I64
},
@ -748,6 +763,13 @@ pub enum Inst {
size: VectorSize,
},
/// Zero-extend a SIMD & FP scalar to the full width of a vector register.
FpuExtend {
rd: Writable<Reg>,
rn: Reg,
size: ScalarSize,
},
/// 1-op FPU instruction.
FpuRR {
fpu_op: FPUOp1,
@ -921,6 +943,13 @@ pub enum Inst {
size: VectorSize,
},
/// Duplicate FP immediate to vector.
VecDupFPImm {
rd: Writable<Reg>,
imm: ASIMDFPModImm,
size: VectorSize,
},
/// Duplicate immediate to vector.
VecDupImm {
rd: Writable<Reg>,
@ -1189,6 +1218,17 @@ pub enum Inst {
/// The needed space before the next deadline.
needed_space: CodeOffset,
},
/// A definition of a value label.
ValueLabelMarker {
reg: Reg,
label: ValueLabel,
},
/// An unwind pseudo-instruction.
Unwind {
inst: UnwindInst,
},
}
fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
@ -1211,35 +1251,6 @@ fn inst_size_test() {
}
impl Inst {
/// Create a move instruction.
pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
assert!(to_reg.to_reg().get_class() == from_reg.get_class());
if from_reg.get_class() == RegClass::I64 {
Inst::Mov64 {
rd: to_reg,
rm: from_reg,
}
} else if from_reg.get_class() == RegClass::V128 {
Inst::FpuMove128 {
rd: to_reg,
rn: from_reg,
}
} else {
Inst::FpuMove64 {
rd: to_reg,
rn: from_reg,
}
}
}
/// Create a 32-bit move instruction.
pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
Inst::Mov32 {
rd: to_reg,
rm: from_reg,
}
}
/// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
/// logical immediate, or constant pool).
pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
@ -1312,22 +1323,25 @@ impl Inst {
}
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u32,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 32 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if value == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(),
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size8x8
size: VectorSize::Size32x2
}]
} else {
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
// bits.
let tmp = alloc_tmp(RegClass::I64, I32);
let tmp = alloc_tmp(I32);
let mut insts = Inst::load_constant(tmp, value as u64);
insts.push(Inst::MovToFpu {
@ -1341,18 +1355,21 @@ impl Inst {
}
/// Create instructions that load a 64-bit floating-point constant.
pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u64,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 64 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if let Ok(const_data) = u32::try_from(const_data) {
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
// bits. Also, treat it as half of a 128-bit vector and consider replicated
// patterns. Scalar MOVI might also be an option.
} else if const_data & (u32::MAX as u64) == 0 {
let tmp = alloc_tmp(RegClass::I64, I64);
let tmp = alloc_tmp(I64);
let mut insts = Inst::load_constant(tmp, const_data);
insts.push(Inst::MovToFpu {
@ -1368,7 +1385,7 @@ impl Inst {
}
/// Create instructions that load a 128-bit vector constant.
pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u128,
alloc_tmp: F,
@ -1416,15 +1433,24 @@ impl Inst {
r
}
/// Create instructions that load a 128-bit vector constant consisting of elements with
/// Create instructions that load a vector constant consisting of elements with
/// the same value.
pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
pattern: u64,
size: VectorSize,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 5]> {
let lane_size = size.lane_size();
let widen_32_bit_pattern = |pattern, lane_size| {
if lane_size == ScalarSize::Size32 {
let pattern = pattern as u32 as u64;
ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
} else {
None
}
};
if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
smallvec![Inst::VecDupImm {
@ -1443,8 +1469,29 @@ impl Inst {
invert: true,
size
}]
} else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
let mut insts = smallvec![Inst::VecDupImm {
rd,
imm,
invert: false,
size: VectorSize::Size64x2,
}];
// TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
// lower 64 bits instead.
if !size.is_128bits() {
insts.push(Inst::FpuExtend {
rd,
rn: rd.to_reg(),
size: ScalarSize::Size64,
});
}
insts
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
smallvec![Inst::VecDupFPImm { rd, imm, size }]
} else {
let tmp = alloc_tmp(RegClass::I64, I64);
let tmp = alloc_tmp(I64);
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
insts.push(Inst::VecDup {
@ -1558,6 +1605,17 @@ impl Inst {
}
}
}
/// Generate a LoadAddr instruction (load address of an amode into
/// register). Elides when possible (when amode is just a register). Returns
/// destination register: either `rd` or a register directly from the amode.
pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
if let Some(r) = mem.is_reg() {
(r, None)
} else {
(rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
}
}
}
//=============================================================================
@ -1691,7 +1749,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_use(rn);
collector.add_use(rm);
}
&Inst::CSet { rd, .. } => {
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
collector.add_def(rd);
}
&Inst::CCmpImm { rn, .. } => {
@ -1704,7 +1762,12 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(writable_xreg(27));
collector.add_def(writable_xreg(28));
}
&Inst::AtomicCAS { .. } => {
&Inst::AtomicCAS { rs, rt, rn, .. } => {
collector.add_mod(rs);
collector.add_use(rt);
collector.add_use(rn);
}
&Inst::AtomicCASLoop { .. } => {
collector.add_use(xreg(25));
collector.add_use(xreg(26));
collector.add_use(xreg(28));
@ -1732,6 +1795,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::FpuExtend { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::FpuRR { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
@ -1881,6 +1948,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecDupFPImm { rd, .. } => {
collector.add_def(rd);
}
&Inst::VecDupImm { rd, .. } => {
collector.add_def(rd);
}
@ -1971,6 +2041,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
memarg_regs(mem, collector);
}
&Inst::VirtualSPOffsetAdj { .. } => {}
&Inst::ValueLabelMarker { reg, .. } => {
collector.add_use(reg);
}
&Inst::Unwind { .. } => {}
&Inst::EmitIsland { .. } => {}
}
}
@ -2259,7 +2333,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_use(mapper, rn);
map_use(mapper, rm);
}
&mut Inst::CSet { ref mut rd, .. } => {
&mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
map_def(mapper, rd);
}
&mut Inst::CCmpImm { ref mut rn, .. } => {
@ -2268,7 +2342,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
&mut Inst::AtomicRMW { .. } => {
// There are no vregs to map in this insn.
}
&mut Inst::AtomicCAS { .. } => {
&mut Inst::AtomicCAS {
ref mut rs,
ref mut rt,
ref mut rn,
..
} => {
map_mod(mapper, rs);
map_use(mapper, rt);
map_use(mapper, rn);
}
&mut Inst::AtomicCASLoop { .. } => {
// There are no vregs to map in this insn.
}
&mut Inst::AtomicLoad {
@ -2310,6 +2394,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::FpuExtend {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::FpuRR {
ref mut rd,
ref mut rn,
@ -2593,6 +2685,9 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecDupFPImm { ref mut rd, .. } => {
map_def(mapper, rd);
}
&mut Inst::VecDupImm { ref mut rd, .. } => {
map_def(mapper, rd);
}
@ -2710,6 +2805,10 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
}
&mut Inst::VirtualSPOffsetAdj { .. } => {}
&mut Inst::EmitIsland { .. } => {}
&mut Inst::ValueLabelMarker { ref mut reg, .. } => {
map_use(mapper, reg);
}
&mut Inst::Unwind { .. } => {}
}
}
@ -2778,16 +2877,43 @@ impl MachInst for Inst {
}
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
assert!(ty.bits() <= 128);
Inst::mov(to_reg, from_reg)
let bits = ty.bits();
assert!(bits <= 128);
assert!(to_reg.to_reg().get_class() == from_reg.get_class());
if from_reg.get_class() == RegClass::I64 {
Inst::Mov64 {
rd: to_reg,
rm: from_reg,
}
} else if from_reg.get_class() == RegClass::V128 {
if bits > 64 {
Inst::FpuMove128 {
rd: to_reg,
rn: from_reg,
}
} else {
Inst::FpuMove64 {
rd: to_reg,
rn: from_reg,
}
}
} else {
panic!("Unexpected register class: {:?}", from_reg.get_class());
}
}
fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
to_reg: Writable<Reg>,
value: u64,
fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
to_regs: ValueRegs<Writable<Reg>>,
value: u128,
ty: Type,
alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported yet");
let value = value as u64;
if ty == F64 {
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
} else if ty == F32 {
@ -2811,11 +2937,10 @@ impl MachInst for Inst {
}
}
fn gen_zero_len_nop() -> Inst {
Inst::Nop0
}
fn gen_nop(preferred_size: usize) -> Inst {
if preferred_size == 0 {
return Inst::Nop0;
}
// We can't give a NOP (or any insn) < 4 bytes.
assert!(preferred_size >= 4);
Inst::Nop4
@ -2825,14 +2950,28 @@ impl MachInst for Inst {
None
}
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
Ok(RegClass::V128)
I8 => Ok((&[RegClass::I64], &[I8])),
I16 => Ok((&[RegClass::I64], &[I16])),
I32 => Ok((&[RegClass::I64], &[I32])),
I64 => Ok((&[RegClass::I64], &[I64])),
B1 => Ok((&[RegClass::I64], &[B1])),
B8 => Ok((&[RegClass::I64], &[B8])),
B16 => Ok((&[RegClass::I64], &[B16])),
B32 => Ok((&[RegClass::I64], &[B32])),
B64 => Ok((&[RegClass::I64], &[B64])),
R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
R64 => Ok((&[RegClass::I64], &[R64])),
F32 => Ok((&[RegClass::V128], &[F32])),
F64 => Ok((&[RegClass::V128], &[F64])),
I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])),
B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])),
_ if ty.is_vector() => {
assert!(ty.bits() <= 128);
Ok((&[RegClass::V128], &[I8X16]))
}
IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
@ -2864,6 +3003,17 @@ impl MachInst for Inst {
fn ref_type_regclass(_: &settings::Flags) -> RegClass {
RegClass::I64
}
fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self {
Inst::ValueLabelMarker { label, reg }
}
fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
match self {
Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)),
_ => None,
}
}
}
//=============================================================================
@ -3039,41 +3189,13 @@ impl Inst {
let rn = show_ireg_sized(rn, mb_rru, size);
format!("{} {}, {}", op, rd, rn)
}
&Inst::ULoad8 {
rd,
ref mem,
..
}
| &Inst::SLoad8 {
rd,
ref mem,
..
}
| &Inst::ULoad16 {
rd,
ref mem,
..
}
| &Inst::SLoad16 {
rd,
ref mem,
..
}
| &Inst::ULoad32 {
rd,
ref mem,
..
}
| &Inst::SLoad32 {
rd,
ref mem,
..
}
| &Inst::ULoad64 {
rd,
ref mem,
..
} => {
&Inst::ULoad8 { rd, ref mem, .. }
| &Inst::SLoad8 { rd, ref mem, .. }
| &Inst::ULoad16 { rd, ref mem, .. }
| &Inst::SLoad16 { rd, ref mem, .. }
| &Inst::ULoad32 { rd, ref mem, .. }
| &Inst::SLoad32 { rd, ref mem, .. }
| &Inst::ULoad64 { rd, ref mem, .. } => {
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
let is_unscaled = match &mem {
@ -3101,26 +3223,10 @@ impl Inst {
let mem = mem.show_rru(mb_rru);
format!("{}{} {}, {}", mem_str, op, rd, mem)
}
&Inst::Store8 {
rd,
ref mem,
..
}
| &Inst::Store16 {
rd,
ref mem,
..
}
| &Inst::Store32 {
rd,
ref mem,
..
}
| &Inst::Store64 {
rd,
ref mem,
..
} => {
&Inst::Store8 { rd, ref mem, .. }
| &Inst::Store16 { rd, ref mem, .. }
| &Inst::Store32 { rd, ref mem, .. }
| &Inst::Store64 { rd, ref mem, .. } => {
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
let is_unscaled = match &mem {
@ -3142,13 +3248,17 @@ impl Inst {
let mem = mem.show_rru(mb_rru);
format!("{}{} {}, {}", mem_str, op, rd, mem)
}
&Inst::StoreP64 { rt, rt2, ref mem, .. } => {
&Inst::StoreP64 {
rt, rt2, ref mem, ..
} => {
let rt = rt.show_rru(mb_rru);
let rt2 = rt2.show_rru(mb_rru);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::LoadP64 { rt, rt2, ref mem, .. } => {
&Inst::LoadP64 {
rt, rt2, ref mem, ..
} => {
let rt = rt.to_reg().show_rru(mb_rru);
let rt2 = rt2.to_reg().show_rru(mb_rru);
let mem = mem.show_rru(mb_rru);
@ -3191,6 +3301,11 @@ impl Inst {
let cond = cond.show_rru(mb_rru);
format!("cset {}, {}", rd, cond)
}
&Inst::CSetm { rd, cond } => {
let rd = rd.to_reg().show_rru(mb_rru);
let cond = cond.show_rru(mb_rru);
format!("csetm {}, {}", rd, cond)
}
&Inst::CCmpImm {
size,
rn,
@ -3209,27 +3324,52 @@ impl Inst {
"atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
ty.bits(), op)
}
&Inst::AtomicCAS { ty, .. } => {
&Inst::AtomicCAS { rs, rt, rn, ty } => {
let op = match ty {
I8 => "casalb",
I16 => "casalh",
I32 | I64 => "casal",
_ => panic!("Unsupported type: {}", ty),
};
let size = OperandSize::from_ty(ty);
let rs = show_ireg_sized(rs.to_reg(), mb_rru, size);
let rt = show_ireg_sized(rt, mb_rru, size);
let rn = rn.show_rru(mb_rru);
format!("{} {}, {}, [{}]", op, rs, rt, rn)
}
&Inst::AtomicCASLoop { ty } => {
format!(
"atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
ty.bits())
}
&Inst::AtomicLoad { ty, r_data, r_addr, .. } => {
&Inst::AtomicLoad {
ty, r_data, r_addr, ..
} => {
format!(
"atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru))
r_data.show_rru(mb_rru),
ty.bits(),
r_addr.show_rru(mb_rru)
)
}
&Inst::AtomicStore { ty, r_data, r_addr, .. } => {
&Inst::AtomicStore {
ty, r_data, r_addr, ..
} => {
format!(
"atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru))
"atomically {{ {}_bits_at[{}] = {} }}",
ty.bits(),
r_addr.show_rru(mb_rru),
r_data.show_rru(mb_rru)
)
}
&Inst::Fence {} => {
format!("dmb ish")
}
&Inst::FpuMove64 { rd, rn } => {
let rd = rd.to_reg().show_rru(mb_rru);
let rn = rn.show_rru(mb_rru);
format!("mov {}.8b, {}.8b", rd, rn)
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
format!("fmov {}, {}", rd, rn)
}
&Inst::FpuMove128 { rd, rn } => {
let rd = rd.to_reg().show_rru(mb_rru);
@ -3241,6 +3381,12 @@ impl Inst {
let rn = show_vreg_element(rn, mb_rru, idx, size);
format!("mov {}, {}", rd, rn)
}
&Inst::FpuExtend { rd, rn, size } => {
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
let rn = show_vreg_scalar(rn, mb_rru, size);
format!("fmov {}, {}", rd, rn)
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let (op, sizesrc, sizedest) = match fpu_op {
FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
@ -3364,7 +3510,11 @@ impl Inst {
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
format!(
"ldr {}, pc+8 ; b 12 ; data.f64 {}",
rd,
f64::from_bits(const_data)
)
}
&Inst::LoadFpuConst128 { rd, const_data } => {
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
@ -3473,31 +3623,67 @@ impl Inst {
let rn = show_vreg_element(rn, mb_rru, 0, size);
format!("dup {}, {}", rd, rn)
}
&Inst::VecDupImm { rd, imm, invert, size } => {
&Inst::VecDupFPImm { rd, imm, size } => {
let imm = imm.show_rru(mb_rru);
let op = if invert {
"mvni"
} else {
"movi"
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
format!("fmov {}, {}", rd, imm)
}
&Inst::VecDupImm {
rd,
imm,
invert,
size,
} => {
let imm = imm.show_rru(mb_rru);
let op = if invert { "mvni" } else { "movi" };
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
format!("{} {}, {}", op, rd, imm)
}
&Inst::VecExtend { t, rd, rn, high_half } => {
&Inst::VecExtend {
t,
rd,
rn,
high_half,
} => {
let (op, dest, src) = match (t, high_half) {
(VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
(VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
(VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
(VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
(VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
(VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
(VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecExtendOp::Sxtl8, false) => {
("sxtl", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecExtendOp::Sxtl8, true) => {
("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecExtendOp::Sxtl16, false) => {
("sxtl", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecExtendOp::Sxtl16, true) => {
("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecExtendOp::Sxtl32, false) => {
("sxtl", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecExtendOp::Sxtl32, true) => {
("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
}
(VecExtendOp::Uxtl8, false) => {
("uxtl", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecExtendOp::Uxtl8, true) => {
("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecExtendOp::Uxtl16, false) => {
("uxtl", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecExtendOp::Uxtl16, true) => {
("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecExtendOp::Uxtl32, false) => {
("uxtl", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecExtendOp::Uxtl32, true) => {
("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
}
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
@ -3514,7 +3700,13 @@ impl Inst {
let rn = show_vreg_element(rn, mb_rru, src_idx, size);
format!("mov {}, {}", rd, rn)
}
&Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
&Inst::VecMiscNarrow {
op,
rd,
rn,
size,
high_half,
} => {
let dest_size = if high_half {
assert!(size.is_128bits());
size
@ -3583,11 +3775,11 @@ impl Inst {
};
let rd_size = match alu_op {
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
_ => size
_ => size,
};
let rn_size = match alu_op {
VecALUOp::Smull => size.halve(),
_ => size
_ => size,
};
let rm_size = rn_size;
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
@ -3628,6 +3820,7 @@ impl Inst {
VecMisc2::Frintz => ("frintz", size),
VecMisc2::Frintm => ("frintm", size),
VecMisc2::Frintp => ("frintp", size),
VecMisc2::Cnt => ("cnt", size),
};
let rd_size = if is_shll { size.widen() } else { size };
@ -3645,7 +3838,13 @@ impl Inst {
let rn = show_vreg_vector(rn, mb_rru, size);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecShiftImm { op, rd, rn, size, imm } => {
&Inst::VecShiftImm {
op,
rd,
rn,
size,
imm,
} => {
let op = match op {
VecShiftImmOp::Shl => "shl",
VecShiftImmOp::Ushr => "ushr",
@ -3698,7 +3897,10 @@ impl Inst {
let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
let cond = cond.show_rru(mb_rru);
format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
format!(
"vcsel {}, {}, {}, {} (if-then-else diamond)",
rd, rn, rm, cond
)
}
&Inst::MovToNZCV { rn } => {
let rn = rn.show_rru(mb_rru);
@ -3711,63 +3913,60 @@ impl Inst {
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} if from_bits >= 8 => {
// Is the destination a 32-bit register? Corresponds to whether
// extend-to width is <= 32 bits, *unless* we have an unsigned
// 32-to-64-bit extension, which is implemented with a "mov" to a
// 32-bit (W-reg) dest, because this zeroes the top 32 bits.
let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
OperandSize::Size32
} else {
OperandSize::from_bits(to_bits)
};
let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
let op = match (signed, from_bits, to_bits) {
(false, 8, 32) => "uxtb",
(true, 8, 32) => "sxtb",
(false, 16, 32) => "uxth",
(true, 16, 32) => "sxth",
(false, 8, 64) => "uxtb",
(true, 8, 64) => "sxtb",
(false, 16, 64) => "uxth",
(true, 16, 64) => "sxth",
(false, 32, 64) => "mov", // special case (see above).
(true, 32, 64) => "sxtw",
_ => panic!("Unsupported Extend case: {:?}", self),
};
format!("{} {}, {}", op, rd, rn)
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} if from_bits == 1 && signed => {
let dest_size = OperandSize::from_bits(to_bits);
let zr = if dest_size.is32() { "wzr" } else { "xzr" };
let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
signed: false,
from_bits: 1,
..
} if from_bits == 1 && !signed => {
} => {
let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
format!("and {}, {}, #1", rd, rn)
}
&Inst::Extend { .. } => {
panic!("Unsupported Extend case");
&Inst::Extend {
rd,
rn,
signed: false,
from_bits: 32,
to_bits: 64,
} => {
// The case of a zero extension from 32 to 64 bits, is implemented
// with a "mov" to a 32-bit (W-reg) dest, because this zeroes
// the top 32 bits.
let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
format!("mov {}, {}", rd, rn)
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} => {
assert!(from_bits <= to_bits);
let op = match (signed, from_bits) {
(false, 8) => "uxtb",
(true, 8) => "sxtb",
(false, 16) => "uxth",
(true, 16) => "sxth",
(true, 32) => "sxtw",
(true, _) => "sbfx",
(false, _) => "ubfx",
};
if op == "sbfx" || op == "ubfx" {
let dest_size = OperandSize::from_bits(to_bits);
let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
let rn = show_ireg_sized(rn, mb_rru, dest_size);
format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
} else {
let dest_size = if signed {
OperandSize::from_bits(to_bits)
} else {
OperandSize::Size32
};
let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
format!("{} {}, {}", op, rd, rn)
}
}
&Inst::Call { .. } => format!("bl 0"),
&Inst::CallInd { ref info, .. } => {
@ -3878,9 +4077,12 @@ impl Inst {
for inst in mem_insts.into_iter() {
ret.push_str(&inst.show_rru(mb_rru));
}
let (reg, offset) = match mem {
AMode::Unscaled(r, simm9) => (r, simm9.value()),
AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
let (reg, index_reg, offset) = match mem {
AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
AMode::UnsignedOffset(r, uimm12scaled) => {
(r, None, uimm12scaled.value() as i32)
}
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
};
let abs_offset = if offset < 0 {
@ -3894,8 +4096,18 @@ impl Inst {
ALUOp::Add64
};
if offset == 0 {
let mov = Inst::mov(rd, reg);
if let Some((idx, extendop)) = index_reg {
let add = Inst::AluRRRExtend {
alu_op: ALUOp::Add64,
rd,
rn: reg,
rm: idx,
extendop,
};
ret.push_str(&add.show_rru(mb_rru));
} else if offset == 0 {
let mov = Inst::gen_move(rd, reg, I64);
ret.push_str(&mov.show_rru(mb_rru));
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
let add = Inst::AluRRImm12 {
@ -3925,6 +4137,14 @@ impl Inst {
format!("virtual_sp_offset_adjust {}", offset)
}
&Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
&Inst::ValueLabelMarker { label, reg } => {
format!("value_label {:?}, {}", label, reg.show_rru(mb_rru))
}
&Inst::Unwind { ref inst } => {
format!("unwind {:?}", inst)
}
}
}
}

Просмотреть файл

@ -1,201 +1,2 @@
use super::*;
use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst};
use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
use crate::machinst::UnwindInfoContext;
use crate::result::CodegenResult;
use alloc::vec::Vec;
use regalloc::Reg;
#[cfg(feature = "unwind")]
pub(crate) mod systemv;
pub struct AArch64UnwindInfo;
impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo {
fn create_unwind_info(
context: UnwindInfoContext<Inst>,
) -> CodegenResult<Option<UnwindInfo<Reg>>> {
let word_size = 8u8;
let pair_size = word_size * 2;
let mut codes = Vec::new();
for i in context.prologue.clone() {
let i = i as usize;
let inst = &context.insts[i];
let offset = context.insts_layout[i];
match inst {
Inst::StoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(rn, imm7),
..
} if *rt == regs::fp_reg()
&& *rt2 == regs::link_reg()
&& *rn == regs::writable_stack_reg()
&& imm7.value == -(pair_size as i16) =>
{
// stp fp (x29), lr (x30), [sp, #-16]!
codes.push((
offset,
UnwindCode::StackAlloc {
size: pair_size as u32,
},
));
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *rt,
stack_offset: 0,
},
));
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *rt2,
stack_offset: word_size as u32,
},
));
}
Inst::StoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(rn, imm7),
..
} if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => {
// stp r1, r2, [sp, #(i * #16)]
let stack_offset = imm7.value as u32;
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *rt,
stack_offset,
},
));
if *rt2 != regs::zero_reg() {
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *rt2,
stack_offset: stack_offset + word_size as u32,
},
));
}
}
Inst::AluRRImm12 {
alu_op: ALUOp::Add64,
rd,
rn,
imm12:
Imm12 {
bits: 0,
shift12: false,
},
} if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => {
// mov fp (x29), sp.
codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() }));
}
Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => {
codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 }));
}
_ => {}
}
}
// TODO epilogues
let prologue_size = if context.prologue.is_empty() {
0
} else {
context.insts_layout[context.prologue.end as usize - 1]
};
Ok(Some(UnwindInfo {
prologue_size,
prologue_unwind_codes: codes,
epilogues_unwind_codes: vec![],
function_size: context.len,
word_size,
initial_sp_offset: 0,
}))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("aarch64"))
.expect("expect aarch64 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let result = context.mach_compile_result.unwrap();
let unwind_info = result.unwind_info.unwrap();
assert_eq!(
unwind_info,
UnwindInfo {
prologue_size: 12,
prologue_unwind_codes: vec![
(4, UnwindCode::StackAlloc { size: 16 }),
(
4,
UnwindCode::SaveRegister {
reg: regs::fp_reg(),
stack_offset: 0
}
),
(
4,
UnwindCode::SaveRegister {
reg: regs::link_reg(),
stack_offset: 8
}
),
(
8,
UnwindCode::SetFramePointer {
reg: regs::fp_reg()
}
)
],
epilogues_unwind_codes: vec![],
function_size: 24,
word_size: 8,
initial_sp_offset: 0,
}
);
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
}

Просмотреть файл

@ -1,9 +1,7 @@
//! Unwind information for System V ABI (Aarch64).
use crate::isa::aarch64::inst::regs;
use crate::isa::unwind::input;
use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
use crate::result::CodegenResult;
use crate::isa::unwind::systemv::RegisterMappingError;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
use regalloc::{Reg, RegClass};
@ -31,128 +29,40 @@ pub fn create_cie() -> CommonInformationEntry {
/// Map Cranelift registers to their corresponding Gimli registers.
pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
// For AArch64 DWARF register mappings, see:
//
// https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names
//
// X0--X31 is 0--31; V0--V31 is 64--95.
match reg.get_class() {
RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())),
RegClass::I64 => {
let reg = reg.get_hw_encoding() as u16;
Ok(Register(reg))
}
RegClass::V128 => {
let reg = reg.get_hw_encoding() as u16;
Ok(Register(64 + reg))
}
_ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
}
}
pub(crate) fn create_unwind_info(
unwind: input::UnwindInfo<Reg>,
) -> CodegenResult<Option<UnwindInfo>> {
struct RegisterMapper;
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
fn sp(&self) -> u16 {
regs::stack_reg().get_hw_encoding().into()
}
pub(crate) struct RegisterMapper;
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
let map = RegisterMapper;
Ok(Some(UnwindInfo::build(unwind, &map)?))
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
StackSlotKind,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use gimli::write::Address;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("aarch64"))
.expect("expect aarch64 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(1234))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
fn sp(&self) -> u16 {
regs::stack_reg().get_hw_encoding().into()
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
fn fp(&self) -> u16 {
regs::fp_reg().get_hw_encoding().into()
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("aarch64"))
.expect("expect aarch64 ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(4321))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
fn lr(&self) -> Option<u16> {
Some(regs::link_reg().get_hw_encoding().into())
}
fn create_multi_return_function(call_conv: CallConv) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brnz(v0, block2, &[]);
pos.ins().jump(block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().return_(&[]);
func
fn lr_offset(&self) -> Option<u32> {
Some(8)
}
}

Просмотреть файл

@ -22,7 +22,7 @@ use super::lower_inst;
use crate::data_value::DataValue;
use log::{debug, trace};
use regalloc::{Reg, RegClass, Writable};
use regalloc::{Reg, Writable};
use smallvec::SmallVec;
//============================================================================
@ -111,7 +111,7 @@ pub(crate) enum ResultRegImmShift {
/// Lower an instruction input to a 64-bit constant, if possible.
pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
let input = ctx.get_input(input.insn, input.input);
let input = ctx.get_input_as_source_or_const(input.insn, input.input);
input.constant
}
@ -171,7 +171,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
debug!("put_input_in_reg: input {:?}", input);
let ty = ctx.input_ty(input.insn, input.input);
let from_bits = ty_bits(ty) as u8;
let inputs = ctx.get_input(input.insn, input.input);
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
let in_reg = if let Some(c) = inputs.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
let masked = if from_bits < 64 {
@ -179,9 +179,9 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
} else {
c
};
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
ctx.alloc_tmp(reg_class, ty)
let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
@ -189,14 +189,15 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
}
to_reg.to_reg()
} else {
ctx.use_input_reg(inputs);
inputs.reg
ctx.put_input_in_regs(input.insn, input.input)
.only_reg()
.unwrap()
};
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -207,7 +208,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -224,7 +225,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -236,7 +237,7 @@ pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
@ -272,7 +273,7 @@ fn put_input_in_rs<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRS {
let inputs = ctx.get_input(input.insn, input.input);
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
@ -305,7 +306,7 @@ fn put_input_in_rse<C: LowerCtx<I = Inst>>(
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSE {
let inputs = ctx.get_input(input.insn, input.input);
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
if let Some((insn, 0)) = inputs.inst {
let op = ctx.data(insn).opcode();
let out_ty = ctx.output_ty(insn, 0);
@ -697,7 +698,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
/* addends64.len() == 0 */
{
if addends32.len() > 0 {
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg1, extendop) = addends32.pop().unwrap();
let signed = match extendop {
ExtendOp::SXTW => true,
@ -719,7 +720,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
} else
/* addends32.len() == 0 */
{
let off_reg = ctx.alloc_tmp(RegClass::I64, I64);
let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap();
lower_constant_u64(ctx, off_reg, offset as u64);
offset = 0;
AMode::reg(off_reg.to_reg())
@ -735,7 +736,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
}
// Allocate the temp and shoehorn it into the AMode.
let addr = ctx.alloc_tmp(RegClass::I64, I64);
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg, memarg) = match memarg {
AMode::RegExtended(r1, r2, extendop) => {
(r1, AMode::RegExtended(addr.to_reg(), r2, extendop))
@ -783,7 +784,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// If the register is the stack reg, we must move it to another reg
// before adding it.
let reg = if reg == stack_reg() {
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
tmp.to_reg()
} else {
@ -825,7 +826,7 @@ pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f32,
) {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
@ -837,7 +838,7 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f64,
) {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
@ -854,12 +855,12 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
// is potentially expensive.
ctx.emit(Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(),
imm: ASIMDMovModImm::zero(ScalarSize::Size8),
invert: false,
size: VectorSize::Size8x16,
});
} else {
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
ctx.emit(inst);
}
@ -886,7 +887,7 @@ pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
),
None => (value, size),
};
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
ctx.emit(inst);
@ -1052,7 +1053,7 @@ pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
let inputs = c.get_input(input.insn, input.input);
let inputs = c.get_input_as_source_or_const(input.insn, input.input);
debug!(
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
input, inputs, op
@ -1092,14 +1093,14 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
op: Opcode,
conv: Opcode,
) -> Option<IRInst> {
let inputs = c.get_input(input.insn, input.input);
let inputs = c.get_input_as_source_or_const(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
if data.opcode() == conv {
let inputs = c.get_input(src_inst, 0);
let inputs = c.get_input_as_source_or_const(src_inst, 0);
if let Some((src_inst, _)) = inputs.inst {
let data = c.data(src_inst);
if data.opcode() == op {
@ -1152,24 +1153,77 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
}
}
/// Convert a 0 / 1 result, such as from a conditional-set instruction, into a 0
/// / -1 (all-ones) result as expected for bool operations.
pub(crate) fn normalize_bool_result<C: LowerCtx<I = Inst>>(
/// Materialize a boolean value into a register from the flags
/// (e.g set by a comparison).
/// A 0 / -1 (all-ones) result as expected for bool operations.
pub(crate) fn materialize_bool_result<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
rd: Writable<Reg>,
cond: Cond,
) {
// A boolean is 0 / -1; if output width is > 1, negate.
// A boolean is 0 / -1; if output width is > 1 use `csetm`,
// otherwise use `cset`.
if ty_bits(ctx.output_ty(insn, 0)) > 1 {
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub64,
rd,
rn: zero_reg(),
rm: rd.to_reg(),
});
ctx.emit(Inst::CSetm { rd, cond });
} else {
ctx.emit(Inst::CSet { rd, cond });
}
}
/// This is target-word-size dependent. And it excludes booleans and reftypes.
pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
match ty {
I8 | I16 | I32 | I64 => true,
_ => false,
}
}
fn load_op_to_ty(op: Opcode) -> Option<Type> {
match op {
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8),
Opcode::Sload16 | Opcode::Uload16 | Opcode::Sload16Complex | Opcode::Uload16Complex => {
Some(I16)
}
Opcode::Sload32 | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => {
Some(I32)
}
Opcode::Load | Opcode::LoadComplex => None,
Opcode::Sload8x8 | Opcode::Uload8x8 | Opcode::Sload8x8Complex | Opcode::Uload8x8Complex => {
Some(I8X8)
}
Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload16x4Complex => Some(I16X4),
Opcode::Sload32x2
| Opcode::Uload32x2
| Opcode::Sload32x2Complex
| Opcode::Uload32x2Complex => Some(I32X2),
_ => None,
}
}
/// Helper to lower a load instruction; this is used in several places, because
/// a load can sometimes be merged into another operation.
pub(crate) fn lower_load<C: LowerCtx<I = Inst>, F: FnMut(&mut C, Writable<Reg>, Type, AMode)>(
ctx: &mut C,
ir_inst: IRInst,
inputs: &[InsnInput],
output: InsnOutput,
mut f: F,
) {
let op = ctx.data(ir_inst).opcode();
let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0));
let off = ctx.data(ir_inst).load_store_offset().unwrap();
let mem = lower_address(ctx, elem_ty, &inputs[..], off);
let rd = get_output_reg(ctx, output).only_reg().unwrap();
f(ctx, rd, elem_ty, mem);
}
//=============================================================================
// Lowering-backend trait implementation.
@ -1177,7 +1231,7 @@ impl LowerBackend for AArch64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
@ -1185,9 +1239,8 @@ impl LowerBackend for AArch64Backend {
ctx: &mut C,
branches: &[IRInst],
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
lower_inst::lower_branch(ctx, branches, targets)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -2,13 +2,13 @@
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::aarch64::settings as aarch64_settings;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use crate::settings as shared_settings;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
@ -17,6 +17,7 @@ mod abi;
pub(crate) mod inst;
mod lower;
mod lower_inst;
mod settings;
use inst::create_reg_universe;
@ -25,17 +26,23 @@ use self::inst::EmitInfo;
/// An AArch64 backend.
pub struct AArch64Backend {
triple: Triple,
flags: settings::Flags,
flags: shared_settings::Flags,
isa_flags: aarch64_settings::Flags,
reg_universe: RealRegUniverse,
}
impl AArch64Backend {
/// Create a new AArch64 backend with the given (shared) flags.
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
pub fn new_with_flags(
triple: Triple,
flags: shared_settings::Flags,
isa_flags: aarch64_settings::Flags,
) -> AArch64Backend {
let reg_universe = create_reg_universe(&flags);
AArch64Backend {
triple,
flags,
isa_flags,
reg_universe,
}
}
@ -45,7 +52,7 @@ impl AArch64Backend {
fn compile_vcode(
&self,
func: &Function,
flags: settings::Flags,
flags: shared_settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> {
let emit_info = EmitInfo::new(flags.clone());
let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
@ -64,7 +71,7 @@ impl MachBackend for AArch64Backend {
let buffer = vcode.emit();
let frame_size = vcode.frame_size();
let unwind_info = vcode.unwind_info()?;
let stackslot_offsets = vcode.stackslot_offsets().clone();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe(flags))))
@ -78,7 +85,8 @@ impl MachBackend for AArch64Backend {
buffer,
frame_size,
disasm,
unwind_info,
value_labels_ranges: Default::default(),
stackslot_offsets,
})
}
@ -90,10 +98,19 @@ impl MachBackend for AArch64Backend {
self.triple.clone()
}
fn flags(&self) -> &settings::Flags {
fn flags(&self) -> &shared_settings::Flags {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);
}
fn reg_universe(&self) -> &RealRegUniverse {
&self.reg_universe
}
@ -119,11 +136,18 @@ impl MachBackend for AArch64Backend {
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::UnwindInfo;
use crate::machinst::UnwindInfoKind;
Ok(match (result.unwind_info.as_ref(), kind) {
(Some(info), UnwindInfoKind::SystemV) => {
inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
Ok(match kind {
UnwindInfoKind::SystemV => {
let mapper = self::inst::unwind::systemv::RegisterMapper;
Some(UnwindInfo::SystemV(
crate::isa::unwind::systemv::create_unwind_info_from_insts(
&result.buffer.unwind_info[..],
result.buffer.data.len(),
&mapper,
)?,
))
}
(Some(_info), UnwindInfoKind::Windows) => {
UnwindInfoKind::Windows => {
// TODO: support Windows unwind info on AArch64
None
}
@ -142,9 +166,10 @@ pub fn isa_builder(triple: Triple) -> IsaBuilder {
assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
IsaBuilder {
triple,
setup: settings::builder(),
constructor: |triple, shared_flags, _| {
let backend = AArch64Backend::new_with_flags(triple, shared_flags);
setup: aarch64_settings::builder(),
constructor: |triple, shared_flags, builder| {
let isa_flags = aarch64_settings::Flags::new(&shared_flags, builder);
let backend = AArch64Backend::new_with_flags(triple, shared_flags, isa_flags);
Box::new(TargetIsaAdapter::new(backend))
},
}
@ -179,11 +204,14 @@ mod test {
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().return_(&[v1]);
let mut shared_flags = settings::builder();
shared_flags.set("opt_level", "none").unwrap();
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
let backend = AArch64Backend::new_with_flags(
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
shared_flags,
isa_flags,
);
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
let code = &buffer.data[..];
@ -192,12 +220,11 @@ mod test {
// mov x29, sp
// mov x1, #0x1234
// add w0, w0, w1
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
let golden = vec![
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
0x01, 0x0b, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden[..]);
@ -234,11 +261,14 @@ mod test {
let v3 = pos.ins().isub(v1, v0);
pos.ins().return_(&[v3]);
let mut shared_flags = settings::builder();
shared_flags.set("opt_level", "none").unwrap();
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = aarch64_settings::Flags::new(&shared_flags, aarch64_settings::builder());
let backend = AArch64Backend::new_with_flags(
Triple::from_str("aarch64").unwrap(),
settings::Flags::new(shared_flags),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
@ -259,14 +289,13 @@ mod test {
// cbnz x1, 0x18
// mov x1, #0x1234 // #4660
// sub w0, w0, w1
// mov sp, x29
// ldp x29, x30, [sp], #16
// ret
let golden = vec![
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
193, 168, 192, 3, 95, 214,
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 253, 123, 193, 168, 192, 3,
95, 214,
];
assert_eq!(code, &golden[..]);

Просмотреть файл

@ -0,0 +1,9 @@
//! AArch64 Settings.
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
// public `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta/src/isa/arm64/settings.rs`.
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));

Просмотреть файл

@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
/// Support for the ARM ABI from the callee side (within a function body).
pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
fn compute_arg_locs(
_call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@ -81,7 +82,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
if next_rreg < max_reg_val {
let reg = rreg(next_rreg);
ret.push(ABIArg::Reg(
ret.push(ABIArg::reg(
reg.to_real_reg(),
param.value_type,
param.extension,
@ -101,7 +102,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_rreg < max_reg_val {
ret.push(ABIArg::Reg(
ret.push(ABIArg::reg(
rreg(next_rreg).to_real_reg(),
I32,
ir::ArgumentExtension::None,
@ -124,7 +125,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
let max_stack = next_stack;
for (ty, ext, purpose) in stack_args.into_iter().rev() {
next_stack -= 4;
ret.push(ABIArg::Stack(
ret.push(ABIArg::stack(
(max_stack - next_stack) as i64,
ty,
ext,
@ -185,7 +186,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
Inst::EpiloguePlaceholder
}
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if let Some(imm12) = UImm12::maybe_from_i64(imm as i64) {
@ -209,7 +210,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
insts
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
insts.push(Inst::Cmp {
rn: sp_reg(),
@ -243,7 +244,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
Inst::gen_store(from_reg, mem, ty)
}
fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
let mut ret = SmallVec::new();
if amount == 0 {
@ -283,7 +284,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
Inst::VirtualSPOffsetAdj { offset }
}
fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
fn gen_prologue_frame_setup(_: &settings::Flags) -> SmallInstVec<Inst> {
let mut ret = SmallVec::new();
let reg_list = vec![fp_reg(), lr_reg()];
ret.push(Inst::Push { reg_list });
@ -294,7 +295,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
ret
}
fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> {
let mut ret = SmallVec::new();
ret.push(Inst::Mov {
rd: writable_sp_reg(),
@ -305,6 +306,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
ret
}
fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
// TODO: implement if we ever require stack probes on ARM32 (unlikely
// unless Lucet is ported)
smallvec![]
}
/// Returns stack bytes used as well as instructions. Does not adjust
/// nominal SP offset; caller will do that.
fn gen_clobber_save(
@ -312,7 +319,6 @@ impl ABIMachineSpec for Arm32MachineDeps {
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
if fixed_frame_storage_size > 0 {
@ -342,7 +348,6 @@ impl ABIMachineSpec for Arm32MachineDeps {
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
_fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let clobbered_vec = get_callee_saves(clobbers);
@ -420,6 +425,15 @@ impl ABIMachineSpec for Arm32MachineDeps {
insts
}
fn gen_memcpy(
_call_conv: isa::CallConv,
_dst: Reg,
_src: Reg,
_size: usize,
) -> SmallVec<[Self::I; 8]> {
unimplemented!("StructArgs not implemented for ARM32 yet");
}
fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
match rc {
RegClass::I32 => 1,
@ -445,6 +459,13 @@ impl ABIMachineSpec for Arm32MachineDeps {
}
caller_saved
}
fn get_ext_mode(
_call_conv: isa::CallConv,
specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
specified
}
}
fn is_callee_save(r: RealReg) -> bool {

Просмотреть файл

@ -286,7 +286,6 @@ impl MachInstEmitInfo for EmitInfo {
impl MachInstEmit for Inst {
type Info = EmitInfo;
type State = EmitState;
type UnwindInfo = super::unwind::Arm32UnwindInfo;
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
let start_off = sink.cur_offset();

Просмотреть файл

@ -22,7 +22,6 @@ mod emit;
pub use self::emit::*;
mod regs;
pub use self::regs::*;
pub mod unwind;
#[cfg(test)]
mod emit_tests;
@ -807,12 +806,17 @@ impl MachInst for Inst {
Inst::mov(to_reg, from_reg)
}
fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
to_reg: Writable<Reg>,
value: u64,
fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
to_regs: ValueRegs<Writable<Reg>>,
value: u128,
ty: Type,
_alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported yet");
let value = value as u64;
match ty {
B1 | I8 | B8 | I16 | B16 | I32 | B32 => {
let v: i64 = value as i64;
@ -826,11 +830,10 @@ impl MachInst for Inst {
}
}
fn gen_zero_len_nop() -> Inst {
Inst::Nop0
}
fn gen_nop(preferred_size: usize) -> Inst {
if preferred_size == 0 {
return Inst::Nop0;
}
assert!(preferred_size >= 2);
Inst::Nop2
}
@ -839,10 +842,10 @@ impl MachInst for Inst {
None
}
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
match ty {
I8 | I16 | I32 | B1 | B8 | B16 | B32 => Ok(RegClass::I32),
IFLAGS => Ok(RegClass::I32),
I8 | I16 | I32 | B1 | B8 | B16 | B32 => Ok((&[RegClass::I32], &[I32])),
IFLAGS => Ok((&[RegClass::I32], &[I32])),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty

Просмотреть файл

@ -1,14 +0,0 @@
use super::*;
use crate::isa::unwind::input::UnwindInfo;
use crate::result::CodegenResult;
pub struct Arm32UnwindInfo;
impl UnwindInfoGenerator<Inst> for Arm32UnwindInfo {
fn create_unwind_info(
_context: UnwindInfoContext<Inst>,
) -> CodegenResult<Option<UnwindInfo<Reg>>> {
// TODO
Ok(None)
}
}

Просмотреть файл

@ -13,7 +13,7 @@ use crate::isa::arm32::Arm32Backend;
use super::lower_inst;
use regalloc::{Reg, RegClass, Writable};
use regalloc::{Reg, Writable};
//============================================================================
// Lowering: convert instruction outputs to result types.
@ -55,7 +55,7 @@ pub(crate) enum NarrowValueMode {
/// Lower an instruction output to a reg.
pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
ctx.get_output(out.insn, out.output)
ctx.get_output(out.insn, out.output).only_reg().unwrap()
}
/// Lower an instruction input to a reg.
@ -68,24 +68,27 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
) -> Reg {
let ty = ctx.input_ty(input.insn, input.input);
let from_bits = ty.bits() as u8;
let inputs = ctx.get_input(input.insn, input.input);
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
let in_reg = if let Some(c) = inputs.constant {
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
for inst in Inst::gen_constant(to_reg, c, ty, |reg_class, ty| ctx.alloc_tmp(reg_class, ty))
.into_iter()
let to_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::gen_constant(ValueRegs::one(to_reg), c as u128, ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
ctx.emit(inst);
}
to_reg.to_reg()
} else {
ctx.use_input_reg(inputs);
inputs.reg
ctx.put_input_in_regs(input.insn, input.input)
.only_reg()
.unwrap()
};
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend, 1) => {
let tmp = ctx.alloc_tmp(RegClass::I32, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::AluRRImm8 {
alu_op: ALUOp::And,
rd: tmp,
@ -95,7 +98,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::ZeroExtend, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I32, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rm: in_reg,
@ -105,7 +108,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
tmp.to_reg()
}
(NarrowValueMode::SignExtend, n) if n < 32 => {
let tmp = ctx.alloc_tmp(RegClass::I32, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rm: in_reg,
@ -221,7 +224,7 @@ impl LowerBackend for Arm32Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(
@ -229,9 +232,8 @@ impl LowerBackend for Arm32Backend {
ctx: &mut C,
branches: &[IRInst],
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
lower_inst::lower_branch(ctx, branches, targets)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {

Просмотреть файл

@ -5,12 +5,12 @@ use crate::ir::Inst as IRInst;
use crate::ir::Opcode;
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::settings::Flags;
use crate::CodegenResult;
use crate::isa::arm32::abi::*;
use crate::isa::arm32::inst::*;
use regalloc::RegClass;
use smallvec::SmallVec;
use super::lower::*;
@ -19,6 +19,7 @@ use super::lower::*;
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
flags: &Flags,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
@ -143,7 +144,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rd = output_to_reg(ctx, outputs[0]);
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
let tmp = ctx.alloc_tmp(RegClass::I32, I32);
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
// ror rd, rn, 32 - (rm & 31)
ctx.emit(Inst::AluRRImm8 {
@ -171,7 +172,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match ty {
I32 => {
let rd_hi = output_to_reg(ctx, outputs[0]);
let rd_lo = ctx.alloc_tmp(RegClass::I32, ty);
let rd_lo = ctx.alloc_tmp(ty).only_reg().unwrap();
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
@ -316,7 +317,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Trueif => {
let cmp_insn = ctx
.get_input(inputs[0].insn, inputs[0].input)
.get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
.inst
.unwrap()
.0;
@ -344,7 +345,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
// Verification ensures that the input is always a single-def ifcmp.
let cmp_insn = ctx
.get_input(inputs[0].insn, inputs[0].input)
.get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
.inst
.unwrap()
.0;
@ -471,7 +472,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Trapif => {
let cmp_insn = ctx
.get_input(inputs[0].insn, inputs[0].input)
.get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
.inst
.unwrap()
.0;
@ -487,7 +488,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::FallthroughReturn | Opcode::Return => {
for (i, input) in inputs.iter().enumerate() {
let reg = input_to_reg(ctx, *input, NarrowValueMode::None);
let retval_reg = ctx.retval(i);
let retval_reg = ctx.retval(i).only_reg().unwrap();
let ty = ctx.input_ty(insn, i);
ctx.emit(Inst::gen_move(retval_reg, reg, ty));
@ -503,7 +504,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
Arm32ABICaller::from_func(sig, &extname, dist, caller_conv)?,
Arm32ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
&inputs[..],
)
}
@ -513,7 +514,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
&inputs[1..],
)
}
@ -522,12 +523,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), abi.num_args());
for (i, input) in inputs.iter().enumerate().filter(|(i, _)| *i <= 3) {
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg));
}
abi.emit_call(ctx);
for (i, output) in outputs.iter().enumerate() {
let retval_reg = output_to_reg(ctx, *output);
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg));
}
}
_ => panic!("lowering {} unimplemented!", op),
@ -540,7 +541,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
ctx: &mut C,
branches: &[IRInst],
targets: &[MachLabel],
fallthrough: Option<MachLabel>,
) -> CodegenResult<()> {
// A block should end with at most two branches. The first may be a
// conditional branch; a conditional branch can be followed only by an
@ -557,11 +557,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
let taken = BranchTarget::Label(targets[0]);
let not_taken = match op1 {
Opcode::Jump => BranchTarget::Label(targets[1]),
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
_ => unreachable!(), // assert above.
};
let not_taken = BranchTarget::Label(targets[1]);
match op0 {
Opcode::Brz | Opcode::Brnz => {
let rn = input_to_reg(

Просмотреть файл

@ -7,7 +7,8 @@ use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter,
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Architecture, ArmArchitecture, Triple};
@ -60,6 +61,7 @@ impl MachBackend for Arm32Backend {
let vcode = self.compile_vcode(func, flags.clone())?;
let buffer = vcode.emit();
let frame_size = vcode.frame_size();
let stackslot_offsets = vcode.stackslot_offsets().clone();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe())))
@ -73,7 +75,8 @@ impl MachBackend for Arm32Backend {
buffer,
frame_size,
disasm,
unwind_info: None,
value_labels_ranges: Default::default(),
stackslot_offsets,
})
}
@ -89,6 +92,14 @@ impl MachBackend for Arm32Backend {
&self.flags
}
fn isa_flags(&self) -> Vec<settings::Value> {
Vec::new()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
}
fn reg_universe(&self) -> &RealRegUniverse {
&self.reg_universe
}

Просмотреть файл

@ -10,22 +10,24 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum CallConv {
/// Best performance, not ABI-stable
/// Best performance, not ABI-stable.
Fast,
/// Smallest caller code size, not ABI-stable
/// Smallest caller code size, not ABI-stable.
Cold,
/// System V-style convention used on many platforms
/// System V-style convention used on many platforms.
SystemV,
/// Windows "fastcall" convention, also used for x64 and ARM
/// Windows "fastcall" convention, also used for x64 and ARM.
WindowsFastcall,
/// SpiderMonkey WebAssembly convention on systems using natively SystemV
/// Mac aarch64 calling convention, which is a tweak aarch64 ABI.
AppleAarch64,
/// SpiderMonkey WebAssembly convention on systems using natively SystemV.
BaldrdashSystemV,
/// SpiderMonkey WebAssembly convention on Windows
/// SpiderMonkey WebAssembly convention on Windows.
BaldrdashWindows,
/// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS
/// register slots in the frame.
Baldrdash2020,
/// Specialized convention for the probestack function
/// Specialized convention for the probestack function.
Probestack,
}
@ -36,6 +38,7 @@ impl CallConv {
// Default to System V for unknown targets because most everything
// uses System V.
Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV,
Ok(CallingConvention::AppleAarch64) => Self::AppleAarch64,
Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall,
Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
}
@ -49,6 +52,7 @@ impl CallConv {
LibcallCallConv::Cold => Self::Cold,
LibcallCallConv::SystemV => Self::SystemV,
LibcallCallConv::WindowsFastcall => Self::WindowsFastcall,
LibcallCallConv::AppleAarch64 => Self::AppleAarch64,
LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV,
LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows,
LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020,
@ -80,6 +84,7 @@ impl fmt::Display for CallConv {
Self::Cold => "cold",
Self::SystemV => "system_v",
Self::WindowsFastcall => "windows_fastcall",
Self::AppleAarch64 => "apple_aarch64",
Self::BaldrdashSystemV => "baldrdash_system_v",
Self::BaldrdashWindows => "baldrdash_windows",
Self::Baldrdash2020 => "baldrdash_2020",
@ -96,6 +101,7 @@ impl str::FromStr for CallConv {
"cold" => Ok(Self::Cold),
"system_v" => Ok(Self::SystemV),
"windows_fastcall" => Ok(Self::WindowsFastcall),
"apple_aarch64" => Ok(Self::AppleAarch64),
"baldrdash_system_v" => Ok(Self::BaldrdashSystemV),
"baldrdash_windows" => Ok(Self::BaldrdashWindows),
"baldrdash_2020" => Ok(Self::Baldrdash2020),

Просмотреть файл

@ -6,6 +6,9 @@ use crate::isa::constraints::{BranchRange, RecipeConstraints};
use crate::regalloc::RegDiversions;
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Bits needed to encode an instruction as binary machine code.
///
/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
@ -13,6 +16,7 @@ use core::fmt;
/// operands to encoded bits. The encoding bits provide additional information to the recipe,
/// typically parts of the opcode.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Encoding {
recipe: u16,
bits: u16,

Просмотреть файл

@ -20,7 +20,6 @@
//! appropriate for the requested ISA:
//!
//! ```
//! # extern crate cranelift_codegen;
//! # #[macro_use] extern crate target_lexicon;
//! use cranelift_codegen::isa;
//! use cranelift_codegen::settings::{self, Configurable};
@ -30,12 +29,12 @@
//! let shared_builder = settings::builder();
//! let shared_flags = settings::Flags::new(shared_builder);
//!
//! match isa::lookup(triple!("riscv32")) {
//! match isa::lookup(triple!("x86_64")) {
//! Err(_) => {
//! // The RISC-V target ISA is not available.
//! // The x86_64 target ISA is not available.
//! }
//! Ok(mut isa_builder) => {
//! isa_builder.set("supports_m", "on");
//! isa_builder.set("use_popcnt", "on");
//! let isa = isa_builder.finish(shared_flags);
//! }
//! }
@ -64,21 +63,26 @@ use crate::result::CodegenResult;
use crate::settings;
use crate::settings::SetResult;
use crate::timing;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::fmt::{Debug, Formatter};
use core::hash::Hasher;
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
use thiserror::Error;
#[cfg(feature = "riscv")]
mod riscv;
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
// included whenever building with x86 support. The new backend is the default,
// but the old can be requested with `BackendVariant::Legacy`. However, if this
// crate is built with the `old-x86-backend` feature, then the old backend is
// default instead.
#[cfg(feature = "x86")]
mod x86;
#[cfg(feature = "x64")]
#[cfg(feature = "x86")]
mod x64;
#[cfg(feature = "arm32")]
@ -102,36 +106,68 @@ mod test_utils;
/// Returns a builder that can create a corresponding `TargetIsa`
/// or `Err(LookupError::SupportDisabled)` if not enabled.
macro_rules! isa_builder {
($name: ident, $feature: tt, $triple: ident) => {{
#[cfg(feature = $feature)]
($name: ident, $cfg_terms: tt, $triple: ident) => {{
#[cfg $cfg_terms]
{
Ok($name::isa_builder($triple))
}
#[cfg(not(feature = $feature))]
#[cfg(not $cfg_terms)]
{
Err(LookupError::SupportDisabled)
}
}};
}
/// The "variant" for a given target. On one platform (x86-64), we have two
/// backends, the "old" and "new" one; the new one is the default if included
/// in the build configuration and not otherwise specified.
#[derive(Clone, Copy, Debug)]
pub enum BackendVariant {
/// Any backend available.
Any,
/// A "legacy" backend: one that operates using legalizations and encodings.
Legacy,
/// A backend built on `MachInst`s and the `VCode` framework.
MachInst,
}
impl Default for BackendVariant {
fn default() -> Self {
BackendVariant::Any
}
}
/// Look for an ISA for the given `triple`, selecting the backend variant given
/// by `variant` if available.
pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder, LookupError> {
match (triple.architecture, variant) {
(Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => {
isa_builder!(riscv, (feature = "riscv"), triple)
}
(Architecture::X86_64, BackendVariant::Legacy) => {
isa_builder!(x86, (feature = "x86"), triple)
}
(Architecture::X86_64, BackendVariant::MachInst) => {
isa_builder!(x64, (feature = "x86"), triple)
}
#[cfg(not(feature = "old-x86-backend"))]
(Architecture::X86_64, BackendVariant::Any) => {
isa_builder!(x64, (feature = "x86"), triple)
}
#[cfg(feature = "old-x86-backend")]
(Architecture::X86_64, BackendVariant::Any) => {
isa_builder!(x86, (feature = "x86"), triple)
}
(Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
(Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
_ => Err(LookupError::Unsupported),
}
}
/// Look for an ISA for the given `triple`.
/// Return a builder that can create a corresponding `TargetIsa`.
pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
match triple.architecture {
Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => {
isa_builder!(riscv, "riscv", triple)
}
Architecture::X86_32 { .. } | Architecture::X86_64 => {
if cfg!(feature = "x64") {
isa_builder!(x64, "x64", triple)
} else {
isa_builder!(x86, "x86", triple)
}
}
Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
_ => Err(LookupError::Unsupported),
}
lookup_variant(triple, BackendVariant::Any)
}
/// Look for a supported ISA with the given `name`.
@ -163,6 +199,16 @@ pub struct Builder {
}
impl Builder {
/// Gets the triple for the builder.
pub fn triple(&self) -> &Triple {
&self.triple
}
/// Iterates the available settings in the builder.
pub fn iter(&self) -> impl Iterator<Item = settings::Setting> {
self.setup.iter()
}
/// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
/// fully configured `TargetIsa` trait object.
pub fn finish(self, shared_flags: settings::Flags) -> Box<dyn TargetIsa> {
@ -227,11 +273,31 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// Get the ISA-independent flags that were used to make this trait object.
fn flags(&self) -> &settings::Flags;
/// Get the ISA-dependent flag values that were used to make this trait object.
fn isa_flags(&self) -> Vec<settings::Value>;
/// Get the variant of this ISA (Legacy or MachInst).
fn variant(&self) -> BackendVariant {
BackendVariant::Legacy
}
/// Hashes all flags, both ISA-independent and ISA-specific, into the
/// specified hasher.
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
/// Get the default calling convention of this target.
fn default_call_conv(&self) -> CallConv {
CallConv::triple_default(self.triple())
}
/// Get the endianness of this ISA.
fn endianness(&self) -> ir::Endianness {
match self.triple().endianness().unwrap() {
target_lexicon::Endianness::Little => ir::Endianness::Little,
target_lexicon::Endianness::Big => ir::Endianness::Big,
}
}
/// Get the pointer type of this ISA.
fn pointer_type(&self) -> ir::Type {
ir::Type::int(u16::from(self.pointer_bits())).unwrap()
@ -279,6 +345,12 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
Err(RegisterMappingError::UnsupportedArchitecture)
}
#[cfg(feature = "unwind")]
/// Map a regalloc::Reg to its corresponding DWARF register.
fn map_regalloc_reg_to_dwarf(&self, _: ::regalloc::Reg) -> Result<u16, RegisterMappingError> {
Err(RegisterMappingError::UnsupportedArchitecture)
}
/// Returns an iterator over legal encodings for the instruction.
fn legal_encodings<'a>(
&'a self,

Просмотреть файл

@ -15,10 +15,10 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::hash::{Hash, Hasher};
use target_lexicon::{PointerWidth, Triple};
#[allow(dead_code)]
@ -69,6 +69,15 @@ impl TargetIsa for Isa {
&self.shared_flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.shared_flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}

Просмотреть файл

@ -1,6 +1,6 @@
//! RISC-V Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a

Просмотреть файл

@ -1,4 +1,7 @@
//! Represents information relating to function unwinding.
use regalloc::RealReg;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
@ -66,6 +69,11 @@ pub mod input {
RememberState,
/// Restores the state.
RestoreState,
/// On aarch64 ARMv8.3+ devices, enables or disables pointer authentication.
Aarch64SetPointerAuth {
/// Whether return addresses (hold in LR) contain a pointer-authentication code.
return_addresses: bool,
},
}
/// Unwind information as generated by a backend.
@ -86,3 +94,155 @@ pub mod input {
pub initial_sp_offset: u8,
}
}
/// Unwind pseudoinstruction used in VCode backends: represents that
/// at the present location, an action has just been taken.
///
/// VCode backends always emit unwind info that is relative to a frame
/// pointer, because we are planning to allow for dynamic frame allocation,
/// and because it makes the design quite a lot simpler in general: we don't
/// have to be precise about SP adjustments throughout the body of the function.
///
/// We include only unwind info for prologues at this time. Note that unwind
/// info for epilogues is only necessary if one expects to unwind while within
/// the last few instructions of the function (after FP has been restored) or
/// if one wishes to instruction-step through the epilogue and see a backtrace
/// at every point. This is not necessary for correct operation otherwise and so
/// we simplify the world a bit by omitting epilogue information. (Note that
/// some platforms also don't require or have a way to describe unwind
/// information for epilogues at all: for example, on Windows, the `UNWIND_INFO`
/// format only stores information for the function prologue.)
///
/// Because we are defining an abstraction over multiple unwind formats (at
/// least Windows/fastcall and System V) and multiple architectures (at least
/// x86-64 and aarch64), we have to be a little bit flexible in how we describe
/// the frame. However, it turns out that a least-common-denominator prologue
/// works for all of the cases we have to worry about today!
///
/// We assume the stack looks something like this:
///
///
/// ```plain
/// +----------------------------------------------+
/// | stack arg area, etc (according to ABI) |
/// | ... |
/// SP at call --> +----------------------------------------------+
/// | return address (pushed by HW or SW) |
/// +----------------------------------------------+
/// | old frame pointer (FP) |
/// FP in this --> +----------------------------------------------+
/// function | clobbered callee-save registers |
/// | ... |
/// start of --> +----------------------------------------------+
/// clobbers | (rest of function's frame, irrelevant here) |
/// | ... |
/// SP in this --> +----------------------------------------------+
/// function
/// ```
///
/// We assume that the prologue consists of:
///
/// * `PushFrameRegs`: A push operation that adds the old FP to the stack (and
/// maybe the link register, on architectures that do not push return addresses
/// in hardware)
/// * `DefineFrame`: An update that sets FP to SP to establish a new frame
/// * `SaveReg`: A number of stores or pushes to the stack to save clobbered registers
///
/// Each of these steps has a corresponding pseudo-instruction. At each step,
/// we need some information to determine where the current stack frame is
/// relative to SP or FP. When the `PushFrameRegs` occurs, we need to know how
/// much SP was decremented by, so we can allow the unwinder to continue to find
/// the caller's frame. When we define the new frame, we need to know where FP
/// is in relation to "SP at call" and also "start of clobbers", because
/// different unwind formats define one or the other of those as the anchor by
/// which we define the frame. Finally, when registers are saved, we need to
/// know which ones, and where.
///
/// Different unwind formats work differently; here is a whirlwind tour of how
/// they define frames to help understanding:
///
/// - Windows unwind information defines a frame that must start below the
/// clobber area, because all clobber-save offsets are non-negative. We set it
/// at the "start of clobbers" in the figure above. The `UNWIND_INFO` contains
/// a "frame pointer offset" field; when we define the new frame, the frame is
/// understood to be the value of FP (`RBP`) *minus* this offset. In other
/// words, the FP is *at the frame pointer offset* relative to the
/// start-of-clobber-frame. We use the "FP offset down to clobber area" offset
/// to generate this info.
///
/// - System V unwind information defines a frame in terms of the CFA
/// (call-frame address), which is equal to the "SP at call" above. SysV
/// allows negative offsets, so there is no issue defining clobber-save
/// locations in terms of CFA. The format allows us to define CFA flexibly in
/// terms of any register plus an offset; we define it in terms of FP plus
/// the clobber-to-caller-SP offset once FP is established.
///
/// Note that certain architectures impose limits on offsets: for example, on
/// Windows, the base of the clobber area must not be more than 240 bytes below
/// FP.
///
/// Unwind pseudoinstructions are emitted inline by ABI code as it generates
/// a prologue. Thus, for the usual case, a prologue might look like (using x64
/// as an example):
///
/// ```plain
/// push rbp
/// unwind UnwindInst::PushFrameRegs { offset_upward_to_caller_sp: 16 }
/// mov rbp, rsp
/// unwind UnwindInst::DefineNewFrame { offset_upward_to_caller_sp: 16,
/// offset_downward_to_clobbers: 16 }
/// sub rsp, 32
/// mov [rsp+16], r12
/// unwind UnwindInst::SaveReg { reg: R12, clobber_offset: 0 }
/// mov [rsp+24], r13
/// unwind UnwindInst::SaveReg { reg: R13, clobber_offset: 8 }
/// ...
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum UnwindInst {
/// The frame-pointer register for this architecture has just been pushed to
/// the stack (and on architectures where return-addresses are not pushed by
/// hardware, the link register as well). The FP has not been set to this
/// frame yet. The current location of SP is such that
/// `offset_upward_to_caller_sp` is the distance to SP-at-callsite (our
/// caller's frame).
PushFrameRegs {
/// The offset from the current SP (after push) to the SP at
/// caller's callsite.
offset_upward_to_caller_sp: u32,
},
/// The frame-pointer register for this architecture has just been
/// set to the current stack location. We wish to define a new
/// frame that is anchored on this new FP value. Offsets are provided
/// upward to the caller's stack frame and downward toward the clobber
/// area. We expect this pseudo-op to come after `PushFrameRegs`.
DefineNewFrame {
/// The offset from the current SP and FP value upward to the value of
/// SP at the callsite that invoked us.
offset_upward_to_caller_sp: u32,
/// The offset from the current SP and FP value downward to the start of
/// the clobber area.
offset_downward_to_clobbers: u32,
},
/// The stack slot at the given offset from the clobber-area base has been
/// used to save the given register.
///
/// Given that `CreateFrame` has occurred first with some
/// `offset_downward_to_clobbers`, `SaveReg` with `clobber_offset` indicates
/// that the value of `reg` is saved on the stack at address `FP -
/// offset_downward_to_clobbers + clobber_offset`.
SaveReg {
/// The offset from the start of the clobber area to this register's
/// stack location.
clobber_offset: u32,
/// The saved register.
reg: RealReg,
},
/// Defines if the aarch64-specific pointer authentication available for ARM v8.3+ devices
/// is enabled for certain pointers or not.
Aarch64SetPointerAuth {
/// Whether return addresses (hold in LR) contain a pointer-authentication code.
return_addresses: bool,
},
}

Просмотреть файл

@ -1,6 +1,8 @@
//! System V ABI unwind information.
use crate::binemit::CodeOffset;
use crate::isa::unwind::input;
use crate::isa::unwind::UnwindInst;
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use gimli::write::{Address, FrameDescriptionEntry};
@ -42,6 +44,11 @@ pub(crate) enum CallFrameInstruction {
RememberState,
RestoreState,
ArgsSize(u32),
/// Enables or disables pointer authentication on aarch64 platforms post ARMv8.3. This
/// particular item maps to gimli::ValExpression(RA_SIGN_STATE, lit0/lit1).
Aarch64SetPointerAuth {
return_addresses: bool,
},
}
impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
@ -73,7 +80,7 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
fn into(self) -> gimli::write::CallFrameInstruction {
use gimli::{write::CallFrameInstruction, Register};
use gimli::{write::CallFrameInstruction, write::Expression, Register};
match self {
Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
@ -90,6 +97,21 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
Self::RememberState => CallFrameInstruction::RememberState,
Self::RestoreState => CallFrameInstruction::RestoreState,
Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
Self::Aarch64SetPointerAuth { return_addresses } => {
// To enable pointer authentication for return addresses in dwarf directives, we
// use a small dwarf expression that sets the value of the pseudo-register
// RA_SIGN_STATE (RA stands for return address) to 0 or 1. This behavior is
// documented in
// https://github.com/ARM-software/abi-aa/blob/master/aadwarf64/aadwarf64.rst#41dwarf-register-names.
let mut expr = Expression::new();
expr.op(if return_addresses {
gimli::DW_OP_lit1
} else {
gimli::DW_OP_lit0
});
const RA_SIGN_STATE: Register = Register(34);
CallFrameInstruction::ValExpression(RA_SIGN_STATE, expr)
}
}
}
}
@ -100,6 +122,16 @@ pub(crate) trait RegisterMapper<Reg> {
fn map(&self, reg: Reg) -> Result<Register, RegisterMappingError>;
/// Gets stack pointer register.
fn sp(&self) -> Register;
/// Gets the frame pointer register.
fn fp(&self) -> Register;
/// Gets the link register, if any.
fn lr(&self) -> Option<Register> {
None
}
/// What is the offset from saved FP to saved LR?
fn lr_offset(&self) -> Option<u32> {
None
}
}
/// Represents unwind information for a single System V ABI function.
@ -112,7 +144,88 @@ pub struct UnwindInfo {
len: u32,
}
pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
insts: &[(CodeOffset, UnwindInst)],
code_len: usize,
mr: &MR,
) -> CodegenResult<UnwindInfo> {
let mut instructions = vec![];
let mut clobber_offset_to_cfa = 0;
for &(instruction_offset, ref inst) in insts {
match inst {
&UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp,
} => {
// Define CFA in terms of current SP (SP changed and we haven't
// set FP yet).
instructions.push((
instruction_offset,
CallFrameInstruction::CfaOffset(offset_upward_to_caller_sp as i32),
));
// Note that we saved the old FP value on the stack.
instructions.push((
instruction_offset,
CallFrameInstruction::Offset(mr.fp(), -(offset_upward_to_caller_sp as i32)),
));
// If there is a link register on this architecture, note that
// we saved it as well.
if let Some(lr) = mr.lr() {
instructions.push((
instruction_offset,
CallFrameInstruction::Offset(
lr,
-(offset_upward_to_caller_sp as i32)
+ mr.lr_offset().expect("LR offset not provided") as i32,
),
));
}
}
&UnwindInst::DefineNewFrame {
offset_upward_to_caller_sp,
offset_downward_to_clobbers,
} => {
// Define CFA in terms of FP. Note that we assume it was already
// defined correctly in terms of the current SP, and FP has just
// been set to the current SP, so we do not need to change the
// offset, only the register.
instructions.push((
instruction_offset,
CallFrameInstruction::CfaRegister(mr.fp()),
));
// Record distance from CFA downward to clobber area so we can
// express clobber offsets later in terms of CFA.
clobber_offset_to_cfa = offset_upward_to_caller_sp + offset_downward_to_clobbers;
}
&UnwindInst::SaveReg {
clobber_offset,
reg,
} => {
let reg = mr
.map(reg.to_reg())
.map_err(|e| CodegenError::RegisterMappingError(e))?;
let off = (clobber_offset as i32) - (clobber_offset_to_cfa as i32);
instructions.push((instruction_offset, CallFrameInstruction::Offset(reg, off)));
}
&UnwindInst::Aarch64SetPointerAuth { return_addresses } => {
instructions.push((
instruction_offset,
CallFrameInstruction::Aarch64SetPointerAuth { return_addresses },
));
}
}
}
Ok(UnwindInfo {
instructions,
len: code_len as u32,
})
}
impl UnwindInfo {
// TODO: remove `build()` below when old backend is removed. The new backend uses a simpler
// approach in `create_unwind_info_from_insts()` above.
pub(crate) fn build<'b, Reg: PartialEq + Copy>(
unwind: input::UnwindInfo<Reg>,
map_reg: &'b dyn RegisterMapper<Reg>,
@ -158,6 +271,9 @@ impl UnwindInfo {
UnwindCode::RestoreState => {
builder.restore_state(*offset);
}
UnwindCode::Aarch64SetPointerAuth { return_addresses } => {
builder.set_aarch64_pauth(*offset, *return_addresses);
}
}
}
@ -179,6 +295,8 @@ impl UnwindInfo {
}
}
// TODO: delete the builder below when the old backend is removed.
struct InstructionBuilder<'a, Reg: PartialEq + Copy> {
sp_offset: i32,
frame_register: Option<Reg>,
@ -310,4 +428,11 @@ impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> {
self.instructions
.push((offset, CallFrameInstruction::RestoreState));
}
fn set_aarch64_pauth(&mut self, offset: u32, return_addresses: bool) {
self.instructions.push((
offset,
CallFrameInstruction::Aarch64SetPointerAuth { return_addresses },
));
}
}

Просмотреть файл

@ -1,6 +1,6 @@
//! Windows x64 ABI unwind information.
use crate::isa::{unwind::input, RegUnit};
use crate::isa::unwind::input;
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use byteorder::{ByteOrder, LittleEndian};
@ -8,6 +8,9 @@ use log::warn;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
use crate::binemit::CodeOffset;
use crate::isa::unwind::UnwindInst;
/// Maximum (inclusive) size of a "small" stack allocation
const SMALL_ALLOC_MAX_SIZE: u32 = 128;
/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
@ -41,25 +44,34 @@ impl<'a> Writer<'a> {
/// The supported unwind codes for the x64 Windows ABI.
///
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// See: <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
#[allow(dead_code)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub(crate) enum UnwindCode {
PushRegister {
offset: u8,
instruction_offset: u8,
reg: u8,
},
SaveReg {
instruction_offset: u8,
reg: u8,
stack_offset: u32,
},
SaveXmm {
offset: u8,
instruction_offset: u8,
reg: u8,
stack_offset: u32,
},
StackAlloc {
offset: u8,
instruction_offset: u8,
size: u32,
},
SetFPReg {
instruction_offset: u8,
},
}
impl UnwindCode {
@ -68,37 +80,63 @@ impl UnwindCode {
PushNonvolatileRegister = 0,
LargeStackAlloc = 1,
SmallStackAlloc = 2,
SetFPReg = 3,
SaveNonVolatileRegister = 4,
SaveNonVolatileRegisterFar = 5,
SaveXmm128 = 8,
SaveXmm128Far = 9,
}
match self {
Self::PushRegister { offset, reg } => {
writer.write_u8(*offset);
Self::PushRegister {
instruction_offset,
reg,
} => {
writer.write_u8(*instruction_offset);
writer.write_u8((*reg << 4) | (UnwindOperation::PushNonvolatileRegister as u8));
}
Self::SaveXmm {
offset,
Self::SaveReg {
instruction_offset,
reg,
stack_offset,
}
| Self::SaveXmm {
instruction_offset,
reg,
stack_offset,
} => {
writer.write_u8(*offset);
let is_xmm = match self {
Self::SaveXmm { .. } => true,
_ => false,
};
let (op_small, op_large) = if is_xmm {
(UnwindOperation::SaveXmm128, UnwindOperation::SaveXmm128Far)
} else {
(
UnwindOperation::SaveNonVolatileRegister,
UnwindOperation::SaveNonVolatileRegisterFar,
)
};
writer.write_u8(*instruction_offset);
let scaled_stack_offset = stack_offset / 16;
if scaled_stack_offset <= core::u16::MAX as u32 {
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
writer.write_u8((*reg << 4) | (op_small as u8));
writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
} else {
writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
writer.write_u8((*reg << 4) | (op_large as u8));
writer.write_u16::<LittleEndian>(*stack_offset as u16);
writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
}
}
Self::StackAlloc { offset, size } => {
Self::StackAlloc {
instruction_offset,
size,
} => {
// Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
assert!(*size >= 8);
assert!((*size % 8) == 0);
writer.write_u8(*offset);
writer.write_u8(*instruction_offset);
if *size <= SMALL_ALLOC_MAX_SIZE {
writer.write_u8(
((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
@ -111,7 +149,11 @@ impl UnwindCode {
writer.write_u32::<LittleEndian>(*size);
}
}
};
Self::SetFPReg { instruction_offset } => {
writer.write_u8(*instruction_offset);
writer.write_u8(UnwindOperation::SetFPReg as u8);
}
}
}
fn node_count(&self) -> usize {
@ -125,7 +167,7 @@ impl UnwindCode {
3
}
}
Self::SaveXmm { stack_offset, .. } => {
Self::SaveXmm { stack_offset, .. } | Self::SaveReg { stack_offset, .. } => {
if *stack_offset <= core::u16::MAX as u32 {
2
} else {
@ -143,15 +185,15 @@ pub(crate) enum MappedRegister {
}
/// Maps UnwindInfo register to Windows x64 unwind data.
pub(crate) trait RegisterMapper {
/// Maps RegUnit.
fn map(reg: RegUnit) -> MappedRegister;
pub(crate) trait RegisterMapper<Reg> {
/// Maps a Reg to a Windows unwind register number.
fn map(reg: Reg) -> MappedRegister;
}
/// Represents Windows x64 unwind information.
///
/// For information about Windows x64 unwind info, see:
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct UnwindInfo {
@ -219,8 +261,11 @@ impl UnwindInfo {
.fold(0, |nodes, c| nodes + c.node_count())
}
pub(crate) fn build<MR: RegisterMapper>(
unwind: input::UnwindInfo<RegUnit>,
// TODO: remove `build()` below when old backend is removed. The new backend uses
// a simpler approach in `create_unwind_info_from_insts()` below.
pub(crate) fn build<Reg: PartialEq + Copy + std::fmt::Debug, MR: RegisterMapper<Reg>>(
unwind: input::UnwindInfo<Reg>,
) -> CodegenResult<Self> {
use crate::isa::unwind::input::UnwindCode as InputUnwindCode;
@ -237,7 +282,7 @@ impl UnwindInfo {
// `StackAlloc { size = word_size }`, `SaveRegister { stack_offset: 0 }`
// to the shorter `UnwindCode::PushRegister`.
let push_reg_sequence = if let Some(UnwindCode::StackAlloc {
offset: alloc_offset,
instruction_offset: alloc_offset,
size,
}) = unwind_codes.last()
{
@ -246,19 +291,21 @@ impl UnwindInfo {
false
};
if push_reg_sequence {
*unwind_codes.last_mut().unwrap() =
UnwindCode::PushRegister { offset, reg };
*unwind_codes.last_mut().unwrap() = UnwindCode::PushRegister {
instruction_offset: offset,
reg,
};
} else {
// TODO add `UnwindCode::SaveRegister` to handle multiple register
// pushes with single `UnwindCode::StackAlloc`.
return Err(CodegenError::Unsupported(
"Unsupported UnwindCode::PushRegister sequence".into(),
));
unwind_codes.push(UnwindCode::SaveReg {
instruction_offset: offset,
reg,
stack_offset: *stack_offset,
});
}
}
MappedRegister::Xmm(reg) => {
unwind_codes.push(UnwindCode::SaveXmm {
offset,
instruction_offset: offset,
reg,
stack_offset: *stack_offset,
});
@ -267,7 +314,7 @@ impl UnwindInfo {
}
InputUnwindCode::StackAlloc { size } => {
unwind_codes.push(UnwindCode::StackAlloc {
offset: ensure_unwind_offset(*offset)?,
instruction_offset: ensure_unwind_offset(*offset)?,
size: *size,
});
}
@ -285,6 +332,65 @@ impl UnwindInfo {
}
}
const UNWIND_RBP_REG: u8 = 5;
pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
insts: &[(CodeOffset, UnwindInst)],
) -> CodegenResult<UnwindInfo> {
let mut unwind_codes = vec![];
let mut frame_register_offset = 0;
let mut max_unwind_offset = 0;
for &(instruction_offset, ref inst) in insts {
let instruction_offset = ensure_unwind_offset(instruction_offset)?;
match inst {
&UnwindInst::PushFrameRegs { .. } => {
unwind_codes.push(UnwindCode::PushRegister {
instruction_offset,
reg: UNWIND_RBP_REG,
});
}
&UnwindInst::DefineNewFrame {
offset_downward_to_clobbers,
..
} => {
frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?;
unwind_codes.push(UnwindCode::SetFPReg { instruction_offset });
}
&UnwindInst::SaveReg {
clobber_offset,
reg,
} => match MR::map(reg.to_reg()) {
MappedRegister::Int(reg) => {
unwind_codes.push(UnwindCode::SaveReg {
instruction_offset,
reg,
stack_offset: clobber_offset,
});
}
MappedRegister::Xmm(reg) => {
unwind_codes.push(UnwindCode::SaveXmm {
instruction_offset,
reg,
stack_offset: clobber_offset,
});
}
},
&UnwindInst::Aarch64SetPointerAuth { .. } => {
unreachable!("no aarch64 on x64");
}
}
max_unwind_offset = instruction_offset;
}
Ok(UnwindInfo {
flags: 0,
prologue_size: max_unwind_offset,
frame_register: Some(UNWIND_RBP_REG),
frame_register_offset,
unwind_codes,
})
}
fn ensure_unwind_offset(offset: u32) -> CodegenResult<u8> {
if offset > 255 {
warn!("function prologues cannot exceed 255 bytes in size for Windows x64");

Просмотреть файл

@ -1,9 +1,9 @@
//! Implementation of the standard x64 ABI.
use crate::ir::types::*;
use crate::ir::{self, types, MemFlags, TrapCode, Type};
use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
use crate::isa;
use crate::isa::{x64::inst::*, CallConv};
use crate::isa::{unwind::UnwindInst, x64::inst::*, CallConv};
use crate::machinst::abi_impl::*;
use crate::machinst::*;
use crate::settings;
@ -31,7 +31,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
Some(ABIArg::reg(
regs::r14().to_real_reg(),
types::I64,
param.extension,
@ -40,7 +40,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
Some(ABIArg::reg(
regs::r10().to_real_reg(),
types::I64,
param.extension,
@ -50,7 +50,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
Some(ABIArg::stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
@ -60,7 +60,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
Some(ABIArg::stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn compute_arg_locs(
call_conv: isa::CallConv,
flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let is_baldrdash = call_conv.extends_baldrdash();
let is_fastcall = call_conv.extends_windows_fastcall();
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
let mut next_gpr = 0;
let mut next_vreg = 0;
let mut next_stack: u64 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index
let mut ret = vec![];
if args_or_rets == ArgsOrRets::Args && is_fastcall {
// Fastcall always reserves 32 bytes of shadow space corresponding to
// the four initial in-arg parameters.
//
// (See:
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
next_stack = 32;
}
if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
// Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
// caller TLS-register values, respectively.
@ -131,65 +143,120 @@ impl ABIMachineSpec for X64ABIMachineSpec {
| &ir::ArgumentPurpose::StackLimit
| &ir::ArgumentPurpose::SignatureId
| &ir::ArgumentPurpose::CalleeTLS
| &ir::ArgumentPurpose::CallerTLS => {}
| &ir::ArgumentPurpose::CallerTLS
| &ir::ArgumentPurpose::StructReturn
| &ir::ArgumentPurpose::StructArgument(_) => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
),
}
let intreg = in_int_reg(param.value_type);
let vecreg = in_vec_reg(param.value_type);
debug_assert!(intreg || vecreg);
debug_assert!(!(intreg && vecreg));
let (next_reg, candidate) = if intreg {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
};
debug_assert!(candidate
.map(|r| r.get_class() == RegClass::I64)
.unwrap_or(true));
(&mut next_gpr, candidate)
} else {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
};
debug_assert!(candidate
.map(|r| r.get_class() == RegClass::V128)
.unwrap_or(true));
(&mut next_vreg, candidate)
};
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
assert!(intreg);
ret.push(param);
} else if let Some(reg) = candidate {
ret.push(ABIArg::Reg(
reg.to_real_reg(),
param.value_type,
param.extension,
param.purpose,
));
*next_reg += 1;
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
let size = (param.value_type.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack(
next_stack as i64,
param.value_type,
param.extension,
param.purpose,
));
next_stack += size;
continue;
}
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size as u64;
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size;
ret.push(ABIArg::StructArg {
offset,
size,
purpose: param.purpose,
});
continue;
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
// Now assign ABIArgSlots for each register-sized part.
//
// Note that the handling of `i128` values is unique here:
//
// - If `enable_llvm_abi_extensions` is set in the flags, each
// `i128` is split into two `i64`s and assigned exactly as if it
// were two consecutive 64-bit args. This is consistent with LLVM's
// behavior, and is needed for some uses of Cranelift (e.g., the
// rustc backend).
//
// - Otherwise, both SysV and Fastcall specify behavior (use of
// vector register, a register pair, or passing by reference
// depending on the case), but for simplicity, we will just panic if
// an i128 type appears in a signature and the LLVM extensions flag
// is not set.
//
// For examples of how rustc compiles i128 args and return values on
// both SysV and Fastcall platforms, see:
// https://godbolt.org/z/PhG3ob
if param.value_type.bits() > 64
&& !param.value_type.is_vector()
&& !flags.enable_llvm_abi_extensions()
{
panic!(
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
);
}
let mut slots = vec![];
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::I64;
let nextreg = if intreg {
match args_or_rets {
ArgsOrRets::Args => {
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
}
ArgsOrRets::Rets => {
get_intreg_for_retval(&call_conv, next_gpr, next_param_idx)
}
}
} else {
match args_or_rets {
ArgsOrRets::Args => {
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
}
ArgsOrRets::Rets => {
get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx)
}
}
};
next_param_idx += 1;
if let Some(reg) = nextreg {
if intreg {
next_gpr += 1;
} else {
next_vreg += 1;
}
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg(),
ty: *reg_ty,
extension: param.extension,
});
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
let size = (reg_ty.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
extension: param.extension,
});
next_stack += size;
}
}
ret.push(ABIArg::Slots {
slots,
purpose: param.purpose,
});
}
if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
@ -198,15 +265,15 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
ret.push(ABIArg::Reg(
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
ret.push(ABIArg::reg(
reg.to_real_reg(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
ret.push(ABIArg::Stack(
ret.push(ABIArg::stack(
next_stack as i64,
types::I64,
ir::ArgumentExtension::None,
@ -219,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
None
};
next_stack = (next_stack + 15) & !15;
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something reasonable.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
@ -288,13 +355,13 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Inst::epilogue_placeholder()
}
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> {
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Self::I> {
let mut ret = SmallVec::new();
if from_reg != into_reg.to_reg() {
ret.push(Inst::gen_move(into_reg, from_reg, I64));
}
ret.push(Inst::alu_rmi_r(
true,
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(imm),
into_reg,
@ -302,9 +369,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
ret
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> {
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
smallvec![
Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg),
Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(regs::rsp()), limit_reg),
Inst::TrapIf {
// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
cc: CC::NBE,
@ -343,7 +410,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Inst::store(ty, from_reg, mem)
}
fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> {
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
let (alu_op, amount) = if amount >= 0 {
(AluRmiROpcode::Add, amount)
} else {
@ -353,7 +420,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let amount = amount as u32;
smallvec![Inst::alu_rmi_r(
true,
OperandSize::Size64,
alu_op,
RegMemImm::imm(amount),
Writable::from_reg(regs::rsp()),
@ -366,71 +433,126 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
}
fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> {
fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I> {
let r_rsp = regs::rsp();
let r_rbp = regs::rbp();
let w_rbp = Writable::from_reg(r_rbp);
let mut insts = SmallVec::new();
// `push %rbp`
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp: 16, // RBP, return address
},
});
}
// `mov %rsp, %rbp`
// RSP is now 0 % 16
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp));
insts
}
fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> {
fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Self::I> {
let mut insts = SmallVec::new();
// `mov %rbp, %rsp`
insts.push(Inst::mov_r_r(
true,
OperandSize::Size64,
regs::rbp(),
Writable::from_reg(regs::rsp()),
));
// `pop %rbp`
insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
insts
}
fn gen_probestack(frame_size: u32) -> SmallInstVec<Self::I> {
let mut insts = SmallVec::new();
insts.push(Inst::imm(
OperandSize::Size32,
frame_size as u64,
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
dest: ExternalName::LibCall(LibCall::Probestack),
uses: vec![regs::rax()],
defs: vec![],
opcode: Opcode::Call,
});
insts
}
fn gen_clobber_save(
call_conv: isa::CallConv,
_: &settings::Flags,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>) {
let mut insts = SmallVec::new();
// Find all clobbered registers that are callee-save. These are only I64
// registers (all XMM registers are caller-save) so we can compute the
// total size of the needed stack space easily.
// Find all clobbered registers that are callee-save.
let clobbered = get_callee_saves(&call_conv, clobbers);
let clobbered_size = 8 * clobbered.len() as u32;
let stack_size = clobbered_size + fixed_frame_storage_size;
// Align to 16 bytes.
let stack_size = (stack_size + 15) & !15;
// Adjust the stack pointer downward with one `sub rsp, IMM`
// instruction.
let clobbered_size = compute_clobber_size(&clobbered);
if flags.unwind_info() {
// Emit unwind info: start the frame. The frame (from unwind
// consumers' point of view) starts at clobbbers, just below
// the FP and return address. Spill slots and stack slots are
// part of our actual frame but do not concern the unwinder.
insts.push(Inst::Unwind {
inst: UnwindInst::DefineNewFrame {
offset_downward_to_clobbers: clobbered_size,
offset_upward_to_caller_sp: 16, // RBP, return address
},
});
}
// Adjust the stack pointer downward for clobbers and the function fixed
// frame (spillslots and storage slots).
let stack_size = fixed_frame_storage_size + clobbered_size;
if stack_size > 0 {
insts.push(Inst::alu_rmi_r(
true,
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::imm(stack_size),
Writable::from_reg(regs::rsp()),
));
}
// Store each clobbered register in order at offsets from RSP.
let mut cur_offset = 0;
// Store each clobbered register in order at offsets from RSP,
// placing them above the fixed frame slots.
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered {
let r_reg = reg.to_reg();
let off = cur_offset;
match r_reg.get_class() {
RegClass::I64 => {
insts.push(Inst::mov_r_m(
/* bytes = */ 8,
insts.push(Inst::store(
types::I64,
r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 8;
}
// No XMM regs are callee-save, so we do not need to implement
// this.
_ => unimplemented!(),
RegClass::V128 => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::store(
types::I8X16,
r_reg.to_reg(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 16;
}
_ => unreachable!(),
};
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: off - fixed_frame_storage_size,
reg: r_reg,
},
});
}
}
@ -441,17 +563,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
_fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
fixed_frame_storage_size: u32,
) -> SmallVec<[Self::I; 16]> {
let mut insts = SmallVec::new();
let clobbered = get_callee_saves(&call_conv, clobbers);
let stack_size = 8 * clobbered.len() as u32;
let stack_size = (stack_size + 15) & !15;
let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered);
// Restore regs by loading from offsets of RSP.
let mut cur_offset = 0;
// Restore regs by loading from offsets of RSP. RSP will be
// returned to nominal-RSP at this point, so we can use the
// same offsets that we used when saving clobbers above.
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered {
let rreg = reg.to_reg();
match rreg.get_class() {
@ -462,13 +584,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
));
cur_offset += 8;
}
_ => unimplemented!(),
RegClass::V128 => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::load(
types::I8X16,
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.to_reg()),
ExtKind::None,
));
cur_offset += 16;
}
_ => unreachable!(),
}
}
// Adjust RSP back upward.
if stack_size > 0 {
insts.push(Inst::alu_rmi_r(
true,
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(stack_size),
Writable::from_reg(regs::rsp()),
@ -531,6 +663,51 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}
fn gen_memcpy(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
) -> SmallVec<[Self::I; 8]> {
// Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new();
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
// We need a register to load the address of `memcpy()` below and we
// don't have a lowering context to allocate a temp here; so just use a
// register we know we are free to mutate as part of this sequence
// (because it is clobbered by the call as per the ABI anyway).
let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap();
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
insts.extend(
Inst::gen_constant(
ValueRegs::one(Writable::from_reg(arg2)),
size as u128,
I64,
|_| panic!("tmp should not be needed"),
)
.into_iter(),
);
// We use an indirect call and a full LoadExtName because we do not have
// information about the libcall `RelocDistance` here, so we
// conservatively use the more flexible calling sequence.
insts.push(Inst::LoadExtName {
dst: Writable::from_reg(memcpy_addr),
name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
offset: 0,
});
insts.push(Inst::call_unknown(
RegMem::reg(memcpy_addr),
/* uses = */ vec![arg0, arg1, arg2],
/* defs = */ Self::get_regs_clobbered_by_call(call_conv),
Opcode::Call,
));
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
@ -551,10 +728,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = vec![
// Systemv calling convention:
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
Writable::from_reg(regs::rsi()),
Writable::from_reg(regs::rdi()),
// intersection of Systemv and FastCall calling conventions:
// - GPR: all except RDI, RSI, RBX, RBP, R12 to R15.
// SysV adds RDI, RSI (FastCall makes these callee-saved).
Writable::from_reg(regs::rax()),
Writable::from_reg(regs::rcx()),
Writable::from_reg(regs::rdx()),
@ -562,25 +738,30 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::r9()),
Writable::from_reg(regs::r10()),
Writable::from_reg(regs::r11()),
// - XMM: all the registers!
// - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15).
Writable::from_reg(regs::xmm0()),
Writable::from_reg(regs::xmm1()),
Writable::from_reg(regs::xmm2()),
Writable::from_reg(regs::xmm3()),
Writable::from_reg(regs::xmm4()),
Writable::from_reg(regs::xmm5()),
Writable::from_reg(regs::xmm6()),
Writable::from_reg(regs::xmm7()),
Writable::from_reg(regs::xmm8()),
Writable::from_reg(regs::xmm9()),
Writable::from_reg(regs::xmm10()),
Writable::from_reg(regs::xmm11()),
Writable::from_reg(regs::xmm12()),
Writable::from_reg(regs::xmm13()),
Writable::from_reg(regs::xmm14()),
Writable::from_reg(regs::xmm15()),
];
if !call_conv_of_callee.extends_windows_fastcall() {
caller_saved.push(Writable::from_reg(regs::rsi()));
caller_saved.push(Writable::from_reg(regs::rdi()));
caller_saved.push(Writable::from_reg(regs::xmm6()));
caller_saved.push(Writable::from_reg(regs::xmm7()));
caller_saved.push(Writable::from_reg(regs::xmm8()));
caller_saved.push(Writable::from_reg(regs::xmm9()));
caller_saved.push(Writable::from_reg(regs::xmm10()));
caller_saved.push(Writable::from_reg(regs::xmm11()));
caller_saved.push(Writable::from_reg(regs::xmm12()));
caller_saved.push(Writable::from_reg(regs::xmm13()));
caller_saved.push(Writable::from_reg(regs::xmm14()));
caller_saved.push(Writable::from_reg(regs::xmm15()));
}
if call_conv_of_callee.extends_baldrdash() {
caller_saved.push(Writable::from_reg(regs::r12()));
caller_saved.push(Writable::from_reg(regs::r13()));
@ -591,6 +772,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
caller_saved
}
fn get_ext_mode(
call_conv: isa::CallConv,
specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
if call_conv.extends_baldrdash() {
// Baldrdash (SpiderMonkey) always extends args and return values to the full register.
specified
} else {
// No other supported ABI on x64 does so.
ir::ArgumentExtension::None
}
}
}
impl From<StackAMode> for SyntheticAmode {
@ -629,74 +823,67 @@ impl From<StackAMode> for SyntheticAmode {
}
}
fn in_int_reg(ty: types::Type) -> bool {
match ty {
types::I8
| types::I16
| types::I32
| types::I64
| types::B1
| types::B8
| types::B16
| types::B32
| types::B64
| types::R64 => true,
types::R32 => panic!("unexpected 32-bits refs on x64!"),
_ => false,
}
}
fn in_vec_reg(ty: types::Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ if ty.is_vector() => true,
_ => false,
}
}
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
match call_conv {
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {}
_ => panic!("int args only supported for SysV calling convention"),
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
};
match idx {
0 => Some(regs::rdi()),
1 => Some(regs::rsi()),
2 => Some(regs::rdx()),
3 => Some(regs::rcx()),
4 => Some(regs::r8()),
5 => Some(regs::r9()),
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::rdi()),
(1, false) => Some(regs::rsi()),
(2, false) => Some(regs::rdx()),
(3, false) => Some(regs::rcx()),
(4, false) => Some(regs::r8()),
(5, false) => Some(regs::r9()),
(0, true) => Some(regs::rcx()),
(1, true) => Some(regs::rdx()),
(2, true) => Some(regs::r8()),
(3, true) => Some(regs::r9()),
_ => None,
}
}
fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
match call_conv {
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => {}
_ => panic!("float args only supported for SysV calling convention"),
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
};
match idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
2 => Some(regs::xmm2()),
3 => Some(regs::xmm3()),
4 => Some(regs::xmm4()),
5 => Some(regs::xmm5()),
6 => Some(regs::xmm6()),
7 => Some(regs::xmm7()),
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (floating-point) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::xmm0()),
(1, false) => Some(regs::xmm1()),
(2, false) => Some(regs::xmm2()),
(3, false) => Some(regs::xmm3()),
(4, false) => Some(regs::xmm4()),
(5, false) => Some(regs::xmm5()),
(6, false) => Some(regs::xmm6()),
(7, false) => Some(regs::xmm7()),
(0, true) => Some(regs::xmm0()),
(1, true) => Some(regs::xmm1()),
(2, true) => Some(regs::xmm2()),
(3, true) => Some(regs::xmm3()),
_ => None,
}
}
fn get_intreg_for_retval_systemv(
fn get_intreg_for_retval(
call_conv: &CallConv,
intreg_idx: usize,
retval_idx: usize,
@ -714,11 +901,17 @@ fn get_intreg_for_retval_systemv(
None
}
}
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::WindowsFastcall => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::AppleAarch64 => unreachable!(),
}
}
fn get_fltreg_for_retval_systemv(
fn get_fltreg_for_retval(
call_conv: &CallConv,
fltreg_idx: usize,
retval_idx: usize,
@ -736,7 +929,12 @@ fn get_fltreg_for_retval_systemv(
None
}
}
CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::WindowsFastcall => match fltreg_idx {
0 => Some(regs::xmm0()),
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::AppleAarch64 => unreachable!(),
}
}
@ -769,6 +967,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool {
}
}
fn is_callee_save_fastcall(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
RegClass::I64 => match r.get_hw_encoding() as u8 {
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => match r.get_hw_encoding() as u8 {
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
_ => false,
},
_ => panic!("Unknown register class: {:?}", r.get_class()),
}
}
fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
@ -784,11 +997,33 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(),
CallConv::WindowsFastcall => todo!("windows fastcall"),
CallConv::WindowsFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
CallConv::AppleAarch64 => unreachable!(),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| r.to_reg().get_index());
regs
}
fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().get_class() {
RegClass::I64 => {
clobbered_size += 8;
}
RegClass::V128 => {
clobbered_size = align_to(clobbered_size, 16);
clobbered_size += 16;
}
_ => unreachable!(),
}
}
align_to(clobbered_size, 16)
}

Просмотреть файл

@ -3,7 +3,8 @@
use super::regs::{self, show_ireg_sized};
use super::EmitState;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::MemFlags;
use crate::ir::{MemFlags, Type};
use crate::isa::x64::inst::Inst;
use crate::machinst::*;
use regalloc::{
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
@ -144,7 +145,7 @@ impl PrettyPrint for Amode {
/// A Memory Address. These denote a 64-bit value only.
/// Used for usual addressing modes as well as addressing modes used during compilation, when the
/// moving SP offset is not known.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum SyntheticAmode {
/// A real amode.
Real(Amode),
@ -152,6 +153,9 @@ pub enum SyntheticAmode {
/// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
/// within the function.
NominalSPOffset { simm32: u32 },
/// A virtual offset to a constant that will be emitted in the constant section of the buffer.
ConstantOffset(VCodeConstant),
}
impl SyntheticAmode {
@ -166,6 +170,7 @@ impl SyntheticAmode {
SyntheticAmode::NominalSPOffset { .. } => {
// Nothing to do; the base is SP and isn't involved in regalloc.
}
SyntheticAmode::ConstantOffset(_) => {}
}
}
@ -175,10 +180,11 @@ impl SyntheticAmode {
SyntheticAmode::NominalSPOffset { .. } => {
// Nothing to do.
}
SyntheticAmode::ConstantOffset(_) => {}
}
}
pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer<Inst>) -> Amode {
match self {
SyntheticAmode::Real(addr) => addr.clone(),
SyntheticAmode::NominalSPOffset { simm32 } => {
@ -190,6 +196,9 @@ impl SyntheticAmode {
);
Amode::imm_reg(off as u32, regs::rsp())
}
SyntheticAmode::ConstantOffset(c) => {
Amode::rip_relative(buffer.get_label_for_constant(*c))
}
}
}
}
@ -207,6 +216,7 @@ impl PrettyPrint for SyntheticAmode {
SyntheticAmode::NominalSPOffset { simm32 } => {
format!("rsp({} + virtual offset)", *simm32 as i32)
}
SyntheticAmode::ConstantOffset(c) => format!("const({:?})", c),
}
}
}
@ -276,7 +286,7 @@ impl PrettyPrintSized for RegMemImm {
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
/// 32, 64, or 128 bit value.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum RegMem {
Reg { reg: Reg },
Mem { addr: SyntheticAmode },
@ -336,23 +346,35 @@ impl PrettyPrintSized for RegMem {
#[derive(Copy, Clone, PartialEq)]
pub enum AluRmiROpcode {
Add,
Adc,
Sub,
Sbb,
And,
Or,
Xor,
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
Mul,
/// 8-bit form of And. Handled separately as we don't have full 8-bit op
/// support (we just use wider instructions). Used only with some sequences
/// with SETcc.
And8,
/// 8-bit form of Or.
Or8,
}
impl fmt::Debug for AluRmiROpcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
AluRmiROpcode::Add => "add",
AluRmiROpcode::Adc => "adc",
AluRmiROpcode::Sub => "sub",
AluRmiROpcode::Sbb => "sbb",
AluRmiROpcode::And => "and",
AluRmiROpcode::Or => "or",
AluRmiROpcode::Xor => "xor",
AluRmiROpcode::Mul => "imul",
AluRmiROpcode::And8 => "and",
AluRmiROpcode::Or8 => "or",
};
write!(fmt, "{}", name)
}
@ -364,12 +386,39 @@ impl fmt::Display for AluRmiROpcode {
}
}
impl AluRmiROpcode {
/// Is this a special-cased 8-bit ALU op?
pub fn is_8bit(self) -> bool {
match self {
AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true,
_ => false,
}
}
}
#[derive(Clone, PartialEq)]
pub enum UnaryRmROpcode {
/// Bit-scan reverse.
Bsr,
/// Bit-scan forward.
Bsf,
/// Counts leading zeroes (Leading Zero CouNT).
Lzcnt,
/// Counts trailing zeroes (Trailing Zero CouNT).
Tzcnt,
/// Counts the number of ones (POPulation CouNT).
Popcnt,
}
impl UnaryRmROpcode {
pub(crate) fn available_from(&self) -> Option<InstructionSet> {
match self {
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => None,
UnaryRmROpcode::Lzcnt => Some(InstructionSet::Lzcnt),
UnaryRmROpcode::Tzcnt => Some(InstructionSet::BMI1),
UnaryRmROpcode::Popcnt => Some(InstructionSet::Popcnt),
}
}
}
impl fmt::Debug for UnaryRmROpcode {
@ -377,6 +426,9 @@ impl fmt::Debug for UnaryRmROpcode {
match self {
UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
UnaryRmROpcode::Tzcnt => write!(fmt, "tzcnt"),
UnaryRmROpcode::Popcnt => write!(fmt, "popcnt"),
}
}
}
@ -387,16 +439,30 @@ impl fmt::Display for UnaryRmROpcode {
}
}
#[derive(Clone, Copy, PartialEq)]
pub enum CmpOpcode {
/// CMP instruction: compute `a - b` and set flags from result.
Cmp,
/// TEST instruction: compute `a & b` and set flags from result.
Test,
}
pub(crate) enum InstructionSet {
SSE,
SSE2,
SSSE3,
SSE41,
SSE42,
Popcnt,
Lzcnt,
BMI1,
#[allow(dead_code)] // never constructed (yet).
BMI2,
}
/// Some SSE operations requiring 2 operands r/m and r.
#[derive(Clone, Copy, PartialEq)]
#[allow(dead_code)] // some variants here aren't used just yet
pub enum SseOpcode {
Addps,
Addpd,
@ -406,6 +472,7 @@ pub enum SseOpcode {
Andpd,
Andnps,
Andnpd,
Blendvpd,
Comiss,
Comisd,
Cmpps,
@ -413,6 +480,7 @@ pub enum SseOpcode {
Cmpss,
Cmpsd,
Cvtdq2ps,
Cvtdq2pd,
Cvtsd2ss,
Cvtsd2si,
Cvtsi2ss,
@ -457,7 +525,10 @@ pub enum SseOpcode {
Pabsb,
Pabsw,
Pabsd,
Packssdw,
Packsswb,
Packusdw,
Packuswb,
Paddb,
Paddd,
Paddq,
@ -466,6 +537,7 @@ pub enum SseOpcode {
Paddsw,
Paddusb,
Paddusw,
Palignr,
Pand,
Pandn,
Pavgb,
@ -484,6 +556,7 @@ pub enum SseOpcode {
Pinsrb,
Pinsrw,
Pinsrd,
Pmaddwd,
Pmaxsb,
Pmaxsw,
Pmaxsd,
@ -497,6 +570,18 @@ pub enum SseOpcode {
Pminuw,
Pminud,
Pmovmskb,
Pmovsxbd,
Pmovsxbw,
Pmovsxbq,
Pmovsxwd,
Pmovsxwq,
Pmovsxdq,
Pmovzxbd,
Pmovzxbw,
Pmovzxbq,
Pmovzxwd,
Pmovzxwq,
Pmovzxdq,
Pmulld,
Pmullw,
Pmuludq,
@ -520,8 +605,12 @@ pub enum SseOpcode {
Psubusb,
Psubusw,
Ptest,
Punpckhbw,
Punpcklbw,
Pxor,
Rcpss,
Roundps,
Roundpd,
Roundss,
Roundsd,
Rsqrtss,
@ -585,6 +674,7 @@ impl SseOpcode {
| SseOpcode::Cmpsd
| SseOpcode::Comisd
| SseOpcode::Cvtdq2ps
| SseOpcode::Cvtdq2pd
| SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd
@ -608,7 +698,9 @@ impl SseOpcode {
| SseOpcode::Mulpd
| SseOpcode::Mulsd
| SseOpcode::Orpd
| SseOpcode::Packssdw
| SseOpcode::Packsswb
| SseOpcode::Packuswb
| SseOpcode::Paddb
| SseOpcode::Paddd
| SseOpcode::Paddq
@ -629,6 +721,7 @@ impl SseOpcode {
| SseOpcode::Pcmpgtd
| SseOpcode::Pextrw
| SseOpcode::Pinsrw
| SseOpcode::Pmaddwd
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
| SseOpcode::Pminsw
@ -654,6 +747,8 @@ impl SseOpcode {
| SseOpcode::Psubsw
| SseOpcode::Psubusb
| SseOpcode::Psubusw
| SseOpcode::Punpckhbw
| SseOpcode::Punpcklbw
| SseOpcode::Pxor
| SseOpcode::Sqrtpd
| SseOpcode::Sqrtsd
@ -662,9 +757,15 @@ impl SseOpcode {
| SseOpcode::Ucomisd
| SseOpcode::Xorpd => SSE2,
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
SseOpcode::Pabsb
| SseOpcode::Pabsw
| SseOpcode::Pabsd
| SseOpcode::Palignr
| SseOpcode::Pshufb => SSSE3,
SseOpcode::Insertps
SseOpcode::Blendvpd
| SseOpcode::Insertps
| SseOpcode::Packusdw
| SseOpcode::Pcmpeqq
| SseOpcode::Pextrb
| SseOpcode::Pextrd
@ -678,8 +779,22 @@ impl SseOpcode {
| SseOpcode::Pminsd
| SseOpcode::Pminuw
| SseOpcode::Pminud
| SseOpcode::Pmovsxbd
| SseOpcode::Pmovsxbw
| SseOpcode::Pmovsxbq
| SseOpcode::Pmovsxwd
| SseOpcode::Pmovsxwq
| SseOpcode::Pmovsxdq
| SseOpcode::Pmovzxbd
| SseOpcode::Pmovzxbw
| SseOpcode::Pmovzxbq
| SseOpcode::Pmovzxwd
| SseOpcode::Pmovzxwq
| SseOpcode::Pmovzxdq
| SseOpcode::Pmulld
| SseOpcode::Ptest
| SseOpcode::Roundps
| SseOpcode::Roundpd
| SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41,
@ -707,6 +822,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Andps => "andps",
SseOpcode::Andnps => "andnps",
SseOpcode::Andnpd => "andnpd",
SseOpcode::Blendvpd => "blendvpd",
SseOpcode::Cmpps => "cmpps",
SseOpcode::Cmppd => "cmppd",
SseOpcode::Cmpss => "cmpss",
@ -714,6 +830,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Comiss => "comiss",
SseOpcode::Comisd => "comisd",
SseOpcode::Cvtdq2ps => "cvtdq2ps",
SseOpcode::Cvtdq2pd => "cvtdq2pd",
SseOpcode::Cvtsd2ss => "cvtsd2ss",
SseOpcode::Cvtsd2si => "cvtsd2si",
SseOpcode::Cvtsi2ss => "cvtsi2ss",
@ -758,7 +875,10 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pabsb => "pabsb",
SseOpcode::Pabsw => "pabsw",
SseOpcode::Pabsd => "pabsd",
SseOpcode::Packssdw => "packssdw",
SseOpcode::Packsswb => "packsswb",
SseOpcode::Packusdw => "packusdw",
SseOpcode::Packuswb => "packuswb",
SseOpcode::Paddb => "paddb",
SseOpcode::Paddd => "paddd",
SseOpcode::Paddq => "paddq",
@ -767,6 +887,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddsw => "paddsw",
SseOpcode::Paddusb => "paddusb",
SseOpcode::Paddusw => "paddusw",
SseOpcode::Palignr => "palignr",
SseOpcode::Pand => "pand",
SseOpcode::Pandn => "pandn",
SseOpcode::Pavgb => "pavgb",
@ -785,6 +906,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pinsrb => "pinsrb",
SseOpcode::Pinsrw => "pinsrw",
SseOpcode::Pinsrd => "pinsrd",
SseOpcode::Pmaddwd => "pmaddwd",
SseOpcode::Pmaxsb => "pmaxsb",
SseOpcode::Pmaxsw => "pmaxsw",
SseOpcode::Pmaxsd => "pmaxsd",
@ -798,6 +920,18 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pminuw => "pminuw",
SseOpcode::Pminud => "pminud",
SseOpcode::Pmovmskb => "pmovmskb",
SseOpcode::Pmovsxbd => "pmovsxbd",
SseOpcode::Pmovsxbw => "pmovsxbw",
SseOpcode::Pmovsxbq => "pmovsxbq",
SseOpcode::Pmovsxwd => "pmovsxwd",
SseOpcode::Pmovsxwq => "pmovsxwq",
SseOpcode::Pmovsxdq => "pmovsxdq",
SseOpcode::Pmovzxbd => "pmovzxbd",
SseOpcode::Pmovzxbw => "pmovzxbw",
SseOpcode::Pmovzxbq => "pmovzxbq",
SseOpcode::Pmovzxwd => "pmovzxwd",
SseOpcode::Pmovzxwq => "pmovzxwq",
SseOpcode::Pmovzxdq => "pmovzxdq",
SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq",
@ -821,8 +955,12 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Psubusb => "psubusb",
SseOpcode::Psubusw => "psubusw",
SseOpcode::Ptest => "ptest",
SseOpcode::Punpckhbw => "punpckhbw",
SseOpcode::Punpcklbw => "punpcklbw",
SseOpcode::Pxor => "pxor",
SseOpcode::Rcpss => "rcpss",
SseOpcode::Roundps => "roundps",
SseOpcode::Roundpd => "roundpd",
SseOpcode::Roundss => "roundss",
SseOpcode::Roundsd => "roundsd",
SseOpcode::Rsqrtss => "rsqrtss",
@ -926,7 +1064,7 @@ impl fmt::Display for ExtMode {
}
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
#[derive(Clone)]
#[derive(Clone, Copy)]
pub enum ShiftKind {
ShiftLeft,
/// Inserts zeros in the most significant bits.
@ -1171,9 +1309,30 @@ impl From<FloatCC> for FcmpImm {
}
}
/// Encode the rounding modes used as part of the Rounding Control field.
/// Note, these rounding immediates only consider the rounding control field
/// (i.e. the rounding mode) which only take up the first two bits when encoded.
/// However the rounding immediate which this field helps make up, also includes
/// bits 3 and 4 which define the rounding select and precision mask respectively.
/// These two bits are not defined here and are implictly set to zero when encoded.
pub(crate) enum RoundImm {
RoundNearest = 0x00,
RoundDown = 0x01,
RoundUp = 0x02,
RoundZero = 0x03,
}
impl RoundImm {
pub(crate) fn encode(self) -> u8 {
self as u8
}
}
/// An operand's size in bits.
#[derive(Clone, Copy, PartialEq)]
pub enum OperandSize {
Size8,
Size16,
Size32,
Size64,
}
@ -1181,24 +1340,36 @@ pub enum OperandSize {
impl OperandSize {
pub(crate) fn from_bytes(num_bytes: u32) -> Self {
match num_bytes {
1 | 2 | 4 => OperandSize::Size32,
1 => OperandSize::Size8,
2 => OperandSize::Size16,
4 => OperandSize::Size32,
8 => OperandSize::Size64,
_ => unreachable!(),
_ => unreachable!("Invalid OperandSize: {}", num_bytes),
}
}
// Computes the OperandSize for a given type.
// For vectors, the OperandSize of the lanes is returned.
pub(crate) fn from_ty(ty: Type) -> Self {
Self::from_bytes(ty.lane_type().bytes())
}
// Check that the value of self is one of the allowed sizes.
pub(crate) fn is_one_of(&self, sizes: &[Self]) -> bool {
sizes.iter().any(|val| *self == *val)
}
pub(crate) fn to_bytes(&self) -> u8 {
match self {
Self::Size8 => 1,
Self::Size16 => 2,
Self::Size32 => 4,
Self::Size64 => 8,
}
}
pub(crate) fn to_bits(&self) -> u8 {
match self {
Self::Size32 => 32,
Self::Size64 => 64,
}
self.to_bytes() * 8
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1 @@
pub mod rex;

Просмотреть файл

@ -0,0 +1,453 @@
//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
//!
//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
//! means "hardware register encoding number".
use crate::{
ir::TrapCode,
isa::x64::inst::{
args::{Amode, OperandSize},
regs, EmitInfo, EmitState, Inst, LabelUse,
},
machinst::{MachBuffer, MachInstEmitInfo},
};
use regalloc::{Reg, RegClass};
pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
let xs = (x as i32) as i64;
xs == ((xs << 56) >> 56)
}
pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
let xs = x as i32;
xs == ((xs << 24) >> 24)
}
#[inline(always)]
pub(crate) fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
debug_assert!(m0d < 4);
debug_assert!(enc_reg_g < 8);
debug_assert!(rm_e < 8);
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
}
#[inline(always)]
pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
debug_assert!(shift < 4);
debug_assert!(enc_index < 8);
debug_assert!(enc_base < 8);
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
}
/// Get the encoding number of a GPR.
#[inline(always)]
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert_eq!(reg.get_class(), RegClass::I64);
reg.get_hw_encoding()
}
/// Get the encoding number of any register.
#[inline(always)]
pub(crate) fn reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
reg.get_hw_encoding()
}
/// A small bit field to record a REX prefix specification:
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub(crate) struct RexFlags(u8);
impl RexFlags {
/// By default, set the W field, and don't always emit.
#[inline(always)]
pub(crate) fn set_w() -> Self {
Self(0)
}
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
#[inline(always)]
pub(crate) fn clear_w() -> Self {
Self(1)
}
#[inline(always)]
pub(crate) fn always_emit(&mut self) -> &mut Self {
self.0 = self.0 | 2;
self
}
#[inline(always)]
pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
let enc_reg = int_reg_enc(reg);
if enc_reg >= 4 && enc_reg <= 7 {
self.always_emit();
}
self
}
#[inline(always)]
pub(crate) fn must_clear_w(&self) -> bool {
(self.0 & 1) != 0
}
#[inline(always)]
pub(crate) fn must_always_emit(&self) -> bool {
(self.0 & 2) != 0
}
#[inline(always)]
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = 0;
let b = (enc_e >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
#[inline(always)]
pub fn emit_three_op(
&self,
sink: &mut MachBuffer<Inst>,
enc_g: u8,
enc_index: u8,
enc_base: u8,
) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = (enc_index >> 3) & 1;
let b = (enc_base >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
}
/// Generate the proper Rex flags for the given operand size.
impl From<OperandSize> for RexFlags {
fn from(size: OperandSize) -> Self {
match size {
OperandSize::Size64 => RexFlags::set_w(),
_ => RexFlags::clear_w(),
}
}
}
/// Generate Rex flags for an OperandSize/register tuple.
impl From<(OperandSize, Reg)> for RexFlags {
fn from((size, reg): (OperandSize, Reg)) -> Self {
let mut rex = RexFlags::from(size);
if size == OperandSize::Size8 {
rex.always_emit_if_8bit_needed(reg);
}
rex
}
}
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
/// covers only the small set of possibilities that we actually need.
pub(crate) enum LegacyPrefixes {
/// No prefix bytes.
None,
/// Operand Size Override -- here, denoting "16-bit operation".
_66,
/// The Lock prefix.
_F0,
/// Operand size override and Lock.
_66F0,
/// REPNE, but no specific meaning here -- is just an opcode extension.
_F2,
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
_F3,
/// Operand size override and same effect as F3.
_66F3,
}
impl LegacyPrefixes {
#[inline(always)]
pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
match self {
LegacyPrefixes::_66 => sink.put1(0x66),
LegacyPrefixes::_F0 => sink.put1(0xF0),
LegacyPrefixes::_66F0 => {
// I don't think the order matters, but in any case, this is the same order that
// the GNU assembler uses.
sink.put1(0x66);
sink.put1(0xF0);
}
LegacyPrefixes::_F2 => sink.put1(0xF2),
LegacyPrefixes::_F3 => sink.put1(0xF3),
LegacyPrefixes::_66F3 => {
sink.put1(0x66);
sink.put1(0xF3);
}
LegacyPrefixes::None => (),
}
}
}
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
/// create and emit:
/// - first the legacy prefixes, if any
/// - then the REX prefix, if needed
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
/// - then the MOD/RM byte,
/// - then optionally, a SIB byte,
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
///
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
/// instructions will require their own emitter functions.
///
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
///
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
/// 0xF3_0F_27 and `num_opcodes` == 3.
///
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
pub(crate) fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
mem_e: &Amode,
rex: RexFlags,
) {
// General comment for this function: the registers in `mem_e` must be
// 64-bit integer registers, because they are part of an address
// expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc();
let can_trap = mem_e.can_trap();
if can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
prefixes.emit(sink);
match mem_e {
Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte.
let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm and associated immediates. This is
// significantly complicated due to the multiple special cases.
if *simm32 == 0
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_RBP
&& enc_e != regs::ENC_R12
&& enc_e != regs::ENC_R13
{
// FIXME JRS 2020Feb11: those four tests can surely be
// replaced by a single mask-and-compare check. We should do
// that because this routine is likely to be hot.
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
sink.put1(encode_modrm(0, enc_g & 7, 4));
sink.put1(0x24);
} else if low8_will_sign_extend_to_32(*simm32)
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_R12
{
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
sink.put4(*simm32);
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
&& low8_will_sign_extend_to_32(*simm32)
{
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(0x24);
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
//.. wait for test case for RSP case
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(0x24);
sink.put4(*simm32);
} else {
unreachable!("ImmReg");
}
}
Amode::ImmRegRegShift {
simm32,
base: reg_base,
index: reg_index,
shift,
..
} => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index);
// The rex byte.
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// modrm, SIB, immediates.
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put1(*simm32 as u8);
} else if enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put4(*simm32);
} else {
panic!("ImmRegRegShift");
}
}
Amode::RipRelative { ref target } => {
// First, the REX byte, with REX.B = 0.
rex.emit_two_op(sink, enc_g, 0);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// RIP-relative is mod=00, rm=101.
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
sink.put4(0);
}
}
}
/// This is the core 'emit' function for instructions that do not reference memory.
///
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
/// operand is a register rather than memory. Hence it is much simpler.
pub(crate) fn emit_std_enc_enc(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
enc_e: u8,
rex: RexFlags,
) {
// EncG and EncE can be derived from registers of any class, and they
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
// The legacy prefixes.
prefixes.emit(sink);
// The rex byte.
rex.emit_two_op(sink, enc_g, enc_e);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm byte. The instruction we're generating doesn't access
// memory, so there is no SIB byte or immediate -- we're done.
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
}
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
pub(crate) fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
mem_e: &Amode,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
emit_std_enc_mem(
sink,
state,
info,
prefixes,
opcodes,
num_opcodes,
enc_g,
mem_e,
rex,
);
}
pub(crate) fn emit_std_reg_reg(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
reg_e: Reg,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
let enc_e = reg_enc(reg_e);
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
}
/// Write a suitable number of bits from an imm64 to the sink.
pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
1 => sink.put1(simm32 as u8),
_ => unreachable!(),
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,14 +1,20 @@
//! Registers, the Universe thereof, and printing.
//!
//! These are ordered by sequence number, as required in the Universe. The strange ordering is
//! intended to make callee-save registers available before caller-saved ones. This is a net win
//! provided that each function makes at least one onward call. It'll be a net loss for leaf
//! functions, and we should change the ordering in that case, so as to make caller-save regs
//! available first.
//! These are ordered by sequence number, as required in the Universe.
//!
//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
//! for each function we compile.
//! The caller-saved registers are placed first in order to prefer not to clobber (requiring
//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic
//! in the backend that will apply such pressure; the register allocator's cost heuristics are not
//! aware of the cost of clobber-save/restore code.
//!
//! One might worry that this pessimizes code with many callsites, where using caller-saves causes
//! us to have to save them (as we are the caller) frequently. However, the register allocator
//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the
//! caller-saved (i.e., callee-clobbered) registers.
//!
//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we
//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction
//! at callsites.
use crate::settings;
use alloc::vec::Vec;
@ -17,11 +23,20 @@ use regalloc::{
};
use std::string::String;
// Hardware encodings for a few registers.
// Hardware encodings (note the special rax, rcx, rdx, rbx order).
pub const ENC_RAX: u8 = 0;
pub const ENC_RCX: u8 = 1;
pub const ENC_RDX: u8 = 2;
pub const ENC_RBX: u8 = 3;
pub const ENC_RSP: u8 = 4;
pub const ENC_RBP: u8 = 5;
pub const ENC_RSI: u8 = 6;
pub const ENC_RDI: u8 = 7;
pub const ENC_R8: u8 = 8;
pub const ENC_R9: u8 = 9;
pub const ENC_R10: u8 = 10;
pub const ENC_R11: u8 = 11;
pub const ENC_R12: u8 = 12;
pub const ENC_R13: u8 = 13;
pub const ENC_R14: u8 = 14;
@ -31,44 +46,44 @@ fn gpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::I64, enc, index)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 16)
}
pub(crate) fn r13() -> Reg {
gpr(ENC_R13, 17)
}
pub(crate) fn r14() -> Reg {
gpr(ENC_R14, 18)
}
pub(crate) fn rbx() -> Reg {
gpr(ENC_RBX, 19)
}
pub(crate) fn rsi() -> Reg {
gpr(6, 20)
gpr(ENC_RSI, 16)
}
pub(crate) fn rdi() -> Reg {
gpr(7, 21)
gpr(ENC_RDI, 17)
}
pub(crate) fn rax() -> Reg {
gpr(0, 22)
gpr(ENC_RAX, 18)
}
pub(crate) fn rcx() -> Reg {
gpr(1, 23)
gpr(ENC_RCX, 19)
}
pub(crate) fn rdx() -> Reg {
gpr(2, 24)
gpr(ENC_RDX, 20)
}
pub(crate) fn r8() -> Reg {
gpr(8, 25)
gpr(ENC_R8, 21)
}
pub(crate) fn r9() -> Reg {
gpr(9, 26)
gpr(ENC_R9, 22)
}
pub(crate) fn r10() -> Reg {
gpr(10, 27)
gpr(ENC_R10, 23)
}
pub(crate) fn r11() -> Reg {
gpr(11, 28)
gpr(ENC_R11, 24)
}
pub(crate) fn r12() -> Reg {
gpr(ENC_R12, 25)
}
pub(crate) fn r13() -> Reg {
gpr(ENC_R13, 26)
}
pub(crate) fn r14() -> Reg {
gpr(ENC_R14, 27)
}
pub(crate) fn rbx() -> Reg {
gpr(ENC_RBX, 28)
}
pub(crate) fn r15() -> Reg {
@ -176,13 +191,6 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
// Integer regs.
let first_gpr = regs.len();
// Callee-saved, in the SystemV x86_64 ABI.
regs.push((r12().to_real_reg(), "%r12".into()));
regs.push((r13().to_real_reg(), "%r13".into()));
regs.push((r14().to_real_reg(), "%r14".into()));
regs.push((rbx().to_real_reg(), "%rbx".into()));
// Caller-saved, in the SystemV x86_64 ABI.
regs.push((rsi().to_real_reg(), "%rsi".into()));
regs.push((rdi().to_real_reg(), "%rdi".into()));
@ -194,6 +202,13 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
regs.push((r10().to_real_reg(), "%r10".into()));
regs.push((r11().to_real_reg(), "%r11".into()));
// Callee-saved, in the SystemV x86_64 ABI.
regs.push((r12().to_real_reg(), "%r12".into()));
regs.push((r13().to_real_reg(), "%r13".into()));
regs.push((r14().to_real_reg(), "%r14".into()));
regs.push((rbx().to_real_reg(), "%rbx".into()));
// Other regs, not available to the allocator.
debug_assert_eq!(r15(), pinned_reg());
let allocable = if use_pinned_reg {

Просмотреть файл

@ -1,125 +1,5 @@
use crate::isa::unwind::input::UnwindInfo;
use crate::isa::x64::inst::{
args::{AluRmiROpcode, Amode, RegMemImm, SyntheticAmode},
regs, Inst,
};
use crate::machinst::{UnwindInfoContext, UnwindInfoGenerator};
use crate::result::CodegenResult;
use alloc::vec::Vec;
use regalloc::Reg;
#[cfg(feature = "unwind")]
pub(crate) mod systemv;
pub struct X64UnwindInfo;
impl UnwindInfoGenerator<Inst> for X64UnwindInfo {
fn create_unwind_info(
context: UnwindInfoContext<Inst>,
) -> CodegenResult<Option<UnwindInfo<Reg>>> {
use crate::isa::unwind::input::{self, UnwindCode};
let mut codes = Vec::new();
const WORD_SIZE: u8 = 8;
for i in context.prologue.clone() {
let i = i as usize;
let inst = &context.insts[i];
let offset = context.insts_layout[i];
match inst {
Inst::Push64 {
src: RegMemImm::Reg { reg },
} => {
codes.push((
offset,
UnwindCode::StackAlloc {
size: WORD_SIZE.into(),
},
));
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *reg,
stack_offset: 0,
},
));
}
Inst::MovRR { src, dst, .. } => {
if *src == regs::rsp() {
codes.push((offset, UnwindCode::SetFramePointer { reg: dst.to_reg() }));
}
}
Inst::AluRmiR {
is_64: true,
op: AluRmiROpcode::Sub,
src: RegMemImm::Imm { simm32 },
dst,
..
} if dst.to_reg() == regs::rsp() => {
let imm = *simm32;
codes.push((offset, UnwindCode::StackAlloc { size: imm }));
}
Inst::MovRM {
src,
dst: SyntheticAmode::Real(Amode::ImmReg { simm32, base, .. }),
..
} if *base == regs::rsp() => {
// `mov reg, imm(rsp)`
let imm = *simm32;
codes.push((
offset,
UnwindCode::SaveRegister {
reg: *src,
stack_offset: imm,
},
));
}
Inst::AluRmiR {
is_64: true,
op: AluRmiROpcode::Add,
src: RegMemImm::Imm { simm32 },
dst,
..
} if dst.to_reg() == regs::rsp() => {
let imm = *simm32;
codes.push((offset, UnwindCode::StackDealloc { size: imm }));
}
_ => {}
}
}
let last_epilogue_end = context.len;
let epilogues_unwind_codes = context
.epilogues
.iter()
.map(|epilogue| {
// TODO add logic to process epilogue instruction instead of
// returning empty array.
let end = epilogue.end as usize - 1;
let end_offset = context.insts_layout[end];
if end_offset == last_epilogue_end {
// Do not remember/restore for very last epilogue.
return vec![];
}
let start = epilogue.start as usize;
let offset = context.insts_layout[start];
vec![
(offset, UnwindCode::RememberState),
// TODO epilogue instructions
(end_offset, UnwindCode::RestoreState),
]
})
.collect();
let prologue_size = context.insts_layout[context.prologue.end as usize];
Ok(Some(input::UnwindInfo {
prologue_size,
prologue_unwind_codes: codes,
epilogues_unwind_codes,
function_size: context.len,
word_size: WORD_SIZE,
initial_sp_offset: WORD_SIZE,
}))
}
}
#[cfg(feature = "unwind")]
pub(crate) mod winx64;

Просмотреть файл

@ -1,8 +1,6 @@
//! Unwind information for System V ABI (x86-64).
use crate::isa::unwind::input;
use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
use crate::result::CodegenResult;
use crate::isa::unwind::systemv::RegisterMappingError;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
use regalloc::{Reg, RegClass};
@ -82,21 +80,18 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
}
}
pub(crate) fn create_unwind_info(
unwind: input::UnwindInfo<Reg>,
) -> CodegenResult<Option<UnwindInfo>> {
struct RegisterMapper;
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
fn sp(&self) -> u16 {
X86_64::RSP.0
}
}
let map = RegisterMapper;
pub(crate) struct RegisterMapper;
Ok(Some(UnwindInfo::build(unwind, &map)?))
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
fn sp(&self) -> u16 {
X86_64::RSP.0
}
fn fp(&self) -> u16 {
X86_64::RBP.0
}
}
#[cfg(test)]
@ -114,6 +109,7 @@ mod tests {
use target_lexicon::triple;
#[test]
#[cfg_attr(feature = "old-x86-backend", ignore)]
fn test_simple_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
@ -136,7 +132,7 @@ mod tests {
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 13, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 17, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
@ -156,6 +152,7 @@ mod tests {
}
#[test]
#[cfg_attr(feature = "old-x86-backend", ignore)]
fn test_multi_return_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
@ -175,7 +172,7 @@ mod tests {
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }");
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
}
fn create_multi_return_function(call_conv: CallConv) -> Function {

Просмотреть файл

@ -0,0 +1,16 @@
//! Unwind information for Windows x64 ABI.
use regalloc::{Reg, RegClass};
pub(crate) struct RegisterMapper;
impl crate::isa::unwind::winx64::RegisterMapper<Reg> for RegisterMapper {
fn map(reg: Reg) -> crate::isa::unwind::winx64::MappedRegister {
use crate::isa::unwind::winx64::MappedRegister;
match reg.get_class() {
RegClass::I64 => MappedRegister::Int(reg.get_hw_encoding()),
RegClass::V128 => MappedRegister::Xmm(reg.get_hw_encoding()),
_ => unreachable!(),
}
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -9,10 +9,14 @@ use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags};
use alloc::boxed::Box;
use regalloc::{PrettyPrint, RealRegUniverse};
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::Triple;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
mod abi;
mod inst;
mod lower;
@ -59,7 +63,8 @@ impl MachBackend for X64Backend {
let buffer = vcode.emit();
let buffer = buffer.finish();
let frame_size = vcode.frame_size();
let unwind_info = vcode.unwind_info()?;
let value_labels_ranges = vcode.value_labels_ranges();
let stackslot_offsets = vcode.stackslot_offsets().clone();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
@ -71,7 +76,8 @@ impl MachBackend for X64Backend {
buffer,
frame_size,
disasm,
unwind_info,
value_labels_ranges,
stackslot_offsets,
})
}
@ -79,6 +85,15 @@ impl MachBackend for X64Backend {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.x64_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
self.x64_flags.hash(&mut hasher);
}
fn name(&self) -> &'static str {
"x64"
}
@ -92,15 +107,15 @@ impl MachBackend for X64Backend {
}
fn unsigned_add_overflow_condition(&self) -> IntCC {
// Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
// overflow of an add.
IntCC::UnsignedGreaterThanOrEqual
// Unsigned `<`; this corresponds to the carry flag set on x86, which
// indicates an add has overflowed.
IntCC::UnsignedLessThan
}
fn unsigned_sub_overflow_condition(&self) -> IntCC {
// unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
// underflow of a subtract (carry is borrow for subtract).
IntCC::UnsignedGreaterThanOrEqual
// unsigned `<`; this corresponds to the carry flag set on x86, which
// indicates a sub has underflowed (carry is borrow for subtract).
IntCC::UnsignedLessThan
}
#[cfg(feature = "unwind")]
@ -111,14 +126,22 @@ impl MachBackend for X64Backend {
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::UnwindInfo;
use crate::machinst::UnwindInfoKind;
Ok(match (result.unwind_info.as_ref(), kind) {
(Some(info), UnwindInfoKind::SystemV) => {
inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
}
(Some(_info), UnwindInfoKind::Windows) => {
//TODO inst::unwind::winx64::create_unwind_info(info.clone())?.map(|u| UnwindInfo::WindowsX64(u))
None
Ok(match kind {
UnwindInfoKind::SystemV => {
let mapper = self::inst::unwind::systemv::RegisterMapper;
Some(UnwindInfo::SystemV(
crate::isa::unwind::systemv::create_unwind_info_from_insts(
&result.buffer.unwind_info[..],
result.buffer.data.len(),
&mapper,
)?,
))
}
UnwindInfoKind::Windows => Some(UnwindInfo::WindowsX64(
crate::isa::unwind::winx64::create_unwind_info_from_insts::<
self::inst::unwind::winx64::RegisterMapper,
>(&result.buffer.unwind_info[..])?,
)),
_ => None,
})
}
@ -127,6 +150,11 @@ impl MachBackend for X64Backend {
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
Some(inst::unwind::systemv::create_cie())
}
#[cfg(feature = "unwind")]
fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
}
}
/// Create a new `isa::Builder`.

Просмотреть файл

@ -1,6 +1,6 @@
//! x86 Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a

Просмотреть файл

@ -144,8 +144,13 @@ impl ArgAssigner for Args {
return ValueConversion::VectorSplit.into();
}
// Small integers are extended to the size of a pointer register.
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
// Small integers are extended to the size of a pointer register, but
// only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI
// does, but our other supported ABIs on x86 do not.
if ty.is_int()
&& ty.bits() < u16::from(self.pointer_bits)
&& self.call_conv.extends_baldrdash()
{
match arg.extension {
ArgumentExtension::None => {}
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
@ -507,6 +512,7 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> Codege
}
CallConv::Probestack => unimplemented!("probestack calling convention"),
CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
CallConv::AppleAarch64 => unreachable!(),
}
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше