зеркало из https://github.com/mozilla/gecko-dev.git
Backed out 3 changesets (bug 1641504) for valgrind bustages. CLOSED TREE
Backed out changeset 95646dbd26a1 (bug 1641504) Backed out changeset bf1919e75e65 (bug 1641504) Backed out changeset dedeac296eaa (bug 1641504)
This commit is contained in:
Родитель
b3830ab8e3
Коммит
d641e64a5e
|
@ -60,7 +60,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
|
|||
[source."https://github.com/bytecodealliance/wasmtime"]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
replace-with = "vendored-sources"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
|
||||
[source."https://github.com/badboy/failure"]
|
||||
git = "https://github.com/badboy/failure"
|
||||
|
|
|
@ -763,22 +763,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
dependencies = [
|
||||
"cranelift-entity 0.64.0",
|
||||
"cranelift-entity 0.63.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"cranelift-bforest",
|
||||
"cranelift-codegen-meta",
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.64.0",
|
||||
"cranelift-entity 0.63.0",
|
||||
"log",
|
||||
"regalloc",
|
||||
"smallvec",
|
||||
|
@ -788,17 +788,17 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-meta"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
dependencies = [
|
||||
"cranelift-codegen-shared",
|
||||
"cranelift-entity 0.64.0",
|
||||
"cranelift-entity 0.63.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
|
@ -807,13 +807,13 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"log",
|
||||
|
@ -823,15 +823,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cranelift-wasm"
|
||||
version = "0.64.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=e3d89c8a92a5fadedd75359b8485d23ac45ecf29#e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
version = "0.63.0"
|
||||
source = "git+https://github.com/bytecodealliance/wasmtime?rev=b7cfd39b531680217537cfcf5294a22077a0a58d#b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"cranelift-entity 0.64.0",
|
||||
"cranelift-entity 0.63.0",
|
||||
"cranelift-frontend",
|
||||
"log",
|
||||
"thiserror",
|
||||
"wasmparser 0.57.0",
|
||||
"wasmparser 0.51.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3956,9 +3956,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.25"
|
||||
version = "0.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cca5b48c9db66c5ba084e4660b4c0cfe8b551a96074bc04b7c11de86ad0bf1f9"
|
||||
checksum = "b27b256b41986ac5141b37b8bbba85d314fbf546c182eb255af6720e07e4f804"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
|
@ -5375,9 +5375,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"
|
|||
|
||||
[[package]]
|
||||
name = "wasmparser"
|
||||
version = "0.57.0"
|
||||
version = "0.51.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
|
||||
checksum = "aeb1956b19469d1c5e63e459d29e7b5aa0f558d9f16fcef09736f8a265e6c10a"
|
||||
|
||||
[[package]]
|
||||
name = "wast"
|
||||
|
|
|
@ -76,8 +76,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f
|
|||
|
||||
[patch.crates-io.cranelift-codegen]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
|
||||
[patch.crates-io.cranelift-wasm]
|
||||
git = "https://github.com/bytecodealliance/wasmtime"
|
||||
rev = "e3d89c8a92a5fadedd75359b8485d23ac45ecf29"
|
||||
rev = "b7cfd39b531680217537cfcf5294a22077a0a58d"
|
||||
|
|
|
@ -387,16 +387,8 @@ bool env_uses_shared_memory(const CraneliftModuleEnvironment* wrapper) {
|
|||
return wrapper->env->usesSharedMemory();
|
||||
}
|
||||
|
||||
size_t env_num_types(const CraneliftModuleEnvironment* wrapper) {
|
||||
return wrapper->env->types.length();
|
||||
}
|
||||
const FuncTypeWithId* env_type(const CraneliftModuleEnvironment* wrapper,
|
||||
size_t typeIndex) {
|
||||
return &wrapper->env->types[typeIndex].funcType();
|
||||
}
|
||||
|
||||
const FuncTypeWithId* env_func_sig(const CraneliftModuleEnvironment* wrapper,
|
||||
size_t funcIndex) {
|
||||
const FuncTypeWithId* env_function_signature(
|
||||
const CraneliftModuleEnvironment* wrapper, size_t funcIndex) {
|
||||
return wrapper->env->funcTypes[funcIndex];
|
||||
}
|
||||
|
||||
|
|
|
@ -163,16 +163,15 @@ bool wasm::CraneliftDisabledByFeatures(JSContext* cx, bool* isDisabled,
|
|||
// no threads, no simd, and on ARM64, no reference types.
|
||||
bool debug = cx->realm() && cx->realm()->debuggerObservesAsmJS();
|
||||
bool gc = cx->options().wasmGc();
|
||||
bool multiValue = WasmMultiValueFlag(cx);
|
||||
bool threads =
|
||||
cx->realm() &&
|
||||
cx->realm()->creationOptions().getSharedMemoryAndAtomicsEnabled();
|
||||
#if defined(JS_CODEGEN_ARM64)
|
||||
bool reftypesOnArm64 = cx->options().wasmReftypes();
|
||||
bool multiValue = false;
|
||||
#else
|
||||
// On other platforms, assume reftypes has been implemented.
|
||||
bool reftypesOnArm64 = false;
|
||||
bool multiValue = WasmMultiValueFlag(cx);
|
||||
#endif
|
||||
bool simd = WasmSimdFlag(cx);
|
||||
if (reason) {
|
||||
|
@ -224,8 +223,8 @@ bool wasm::GcTypesAvailable(JSContext* cx) {
|
|||
}
|
||||
|
||||
bool wasm::MultiValuesAvailable(JSContext* cx) {
|
||||
return WasmMultiValueFlag(cx) &&
|
||||
(BaselineAvailable(cx) || IonAvailable(cx) || CraneliftAvailable(cx));
|
||||
// Cranelift does not support multi-value.
|
||||
return WasmMultiValueFlag(cx) && (BaselineAvailable(cx) || IonAvailable(cx));
|
||||
}
|
||||
|
||||
bool wasm::SimdAvailable(JSContext* cx) {
|
||||
|
|
|
@ -13,8 +13,8 @@ name = "baldrdash"
|
|||
# cranelift-wasm to pinned commits. If you want to update Cranelift in Gecko,
|
||||
# you should update the following $TOP_LEVEL/Cargo.toml file: look for the
|
||||
# revision (rev) hashes of both cranelift dependencies (codegen and wasm).
|
||||
cranelift-codegen = { version = "0.64.0", default-features = false }
|
||||
cranelift-wasm = "0.64.0"
|
||||
cranelift-codegen = { version = "0.63.0", default-features = false }
|
||||
cranelift-wasm = "0.63.0"
|
||||
log = { version = "0.4.6", default-features = false, features = ["release_max_level_info"] }
|
||||
env_logger = "0.6"
|
||||
smallvec = "1.0"
|
||||
|
|
|
@ -212,10 +212,7 @@ extern "C" {
|
|||
js::wasm::TypeCode env_unpack(BD_ValType type);
|
||||
|
||||
bool env_uses_shared_memory(const CraneliftModuleEnvironment* env);
|
||||
size_t env_num_types(const CraneliftModuleEnvironment* env);
|
||||
const js::wasm::FuncTypeWithId* env_type(const CraneliftModuleEnvironment* env,
|
||||
size_t typeIndex);
|
||||
const js::wasm::FuncTypeWithId* env_func_sig(
|
||||
const js::wasm::FuncTypeWithId* env_function_signature(
|
||||
const CraneliftModuleEnvironment* env, size_t funcIndex);
|
||||
size_t env_func_import_tls_offset(const CraneliftModuleEnvironment* env,
|
||||
size_t funcIndex);
|
||||
|
|
|
@ -24,7 +24,7 @@ use cranelift_codegen::entity::EntityRef;
|
|||
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
|
||||
use cranelift_codegen::ir::{self, InstBuilder, SourceLoc};
|
||||
use cranelift_codegen::isa;
|
||||
use cranelift_wasm::{FuncIndex, GlobalIndex, SignatureIndex, TableIndex, WasmResult};
|
||||
use cranelift_wasm::{FuncIndex, GlobalIndex, SignatureIndex, TableIndex, WasmError, WasmResult};
|
||||
|
||||
use smallvec::SmallVec;
|
||||
|
||||
|
@ -171,6 +171,17 @@ impl FuncTypeWithId {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn ret_type(self) -> WasmResult<Option<ir::Type>> {
|
||||
match self.results() {
|
||||
Ok(v) => match v.as_slice() {
|
||||
[] => Ok(None),
|
||||
[t] => Ok(Some(*t)),
|
||||
_ => Err(WasmError::Unsupported("multiple values".to_string())),
|
||||
},
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn id_kind(self) -> FuncTypeIdDescKind {
|
||||
unsafe { low_level::funcType_idKind(self.0) }
|
||||
}
|
||||
|
@ -198,14 +209,8 @@ impl<'a> ModuleEnvironment<'a> {
|
|||
pub fn uses_shared_memory(&self) -> bool {
|
||||
unsafe { low_level::env_uses_shared_memory(self.env) }
|
||||
}
|
||||
pub fn num_types(&self) -> usize {
|
||||
unsafe { low_level::env_num_types(self.env) }
|
||||
}
|
||||
pub fn type_(&self, index: usize) -> FuncTypeWithId {
|
||||
FuncTypeWithId(unsafe { low_level::env_type(self.env, index) })
|
||||
}
|
||||
pub fn func_sig(&self, func_index: FuncIndex) -> FuncTypeWithId {
|
||||
FuncTypeWithId(unsafe { low_level::env_func_sig(self.env, func_index.index()) })
|
||||
pub fn function_signature(&self, func_index: FuncIndex) -> FuncTypeWithId {
|
||||
FuncTypeWithId(unsafe { low_level::env_function_signature(self.env, func_index.index()) })
|
||||
}
|
||||
pub fn func_import_tls_offset(&self, func_index: FuncIndex) -> usize {
|
||||
unsafe { low_level::env_func_import_tls_offset(self.env, func_index.index()) }
|
||||
|
|
|
@ -28,7 +28,7 @@ use cranelift_codegen::binemit::{
|
|||
use cranelift_codegen::entity::EntityRef;
|
||||
use cranelift_codegen::ir::{
|
||||
self, constant::ConstantOffset, stackslot::StackSize, ExternalName, JumpTable, SourceLoc,
|
||||
TrapCode, Type,
|
||||
TrapCode,
|
||||
};
|
||||
use cranelift_codegen::isa::TargetIsa;
|
||||
use cranelift_codegen::CodegenResult;
|
||||
|
@ -91,14 +91,12 @@ impl CompiledFunc {
|
|||
pub struct BatchCompiler<'static_env, 'module_env> {
|
||||
// Attributes that are constant accross multiple compilations.
|
||||
static_environ: &'static_env bindings::StaticEnvironment,
|
||||
|
||||
environ: bindings::ModuleEnvironment<'module_env>,
|
||||
module_state: ModuleTranslationState,
|
||||
|
||||
isa: Box<dyn TargetIsa>,
|
||||
|
||||
// Stateless attributes.
|
||||
func_translator: FuncTranslator,
|
||||
dummy_module_state: ModuleTranslationState,
|
||||
|
||||
// Mutable attributes.
|
||||
/// Cranelift overall context.
|
||||
|
@ -126,7 +124,8 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
|
|||
environ,
|
||||
isa,
|
||||
func_translator: FuncTranslator::new(),
|
||||
module_state: create_module_translation_state(&environ)?,
|
||||
// TODO for Cranelift to support multi-value, feed it the real type section here.
|
||||
dummy_module_state: ModuleTranslationState::new(),
|
||||
context: Context::new(),
|
||||
trap_relocs: Traps::new(),
|
||||
trans_env,
|
||||
|
@ -159,7 +158,7 @@ impl<'static_env, 'module_env> BatchCompiler<'static_env, 'module_env> {
|
|||
self.context.func.name = wasm_function_name(index);
|
||||
|
||||
self.func_translator.translate(
|
||||
&self.module_state,
|
||||
&self.dummy_module_state,
|
||||
func.bytecode(),
|
||||
func.offset_in_module as usize,
|
||||
&mut self.context.func,
|
||||
|
@ -301,27 +300,6 @@ impl<'static_env, 'module_env> fmt::Display for BatchCompiler<'static_env, 'modu
|
|||
}
|
||||
}
|
||||
|
||||
fn create_module_translation_state(
|
||||
env: &bindings::ModuleEnvironment,
|
||||
) -> WasmResult<ModuleTranslationState> {
|
||||
let num_sig = env.num_types();
|
||||
|
||||
let mut arg_vecs = vec![];
|
||||
let mut result_vecs = vec![];
|
||||
for i in 0..num_sig {
|
||||
let sig = env.type_(i);
|
||||
arg_vecs.push(sig.args()?);
|
||||
result_vecs.push(sig.results()?);
|
||||
}
|
||||
let types: Vec<(&[Type], &[Type])> = arg_vecs
|
||||
.iter()
|
||||
.zip(result_vecs.iter())
|
||||
.map(|(args, results)| (&args[..], &results[..]))
|
||||
.collect();
|
||||
|
||||
ModuleTranslationState::from_func_sigs(&types[..])
|
||||
}
|
||||
|
||||
/// Create a Cranelift function name representing a WebAssembly function with `index`.
|
||||
pub fn wasm_function_name(func: FuncIndex) -> ExternalName {
|
||||
ExternalName::User {
|
||||
|
@ -538,7 +516,7 @@ impl TrapSink for Traps {
|
|||
// entries, so we don't have to.
|
||||
return;
|
||||
}
|
||||
HeapOutOfBounds | TableOutOfBounds => bindings::Trap::OutOfBounds,
|
||||
HeapOutOfBounds | OutOfBounds | TableOutOfBounds => bindings::Trap::OutOfBounds,
|
||||
IndirectCallToNull => bindings::Trap::IndirectCallToNull,
|
||||
BadSignature => bindings::Trap::IndirectCallBadSig,
|
||||
IntegerOverflow => bindings::Trap::IntegerOverflow,
|
||||
|
|
|
@ -69,7 +69,7 @@ fn init_sig_from_wsig(
|
|||
sig.params.push(ir::AbiParam::new(arg));
|
||||
}
|
||||
|
||||
for ret_type in wsig.results()? {
|
||||
if let Some(ret_type) = wsig.ret_type()? {
|
||||
let ret = match ret_type {
|
||||
// Spidermonkey requires i32 returns to have their high 32 bits
|
||||
// zero so that it can directly box them.
|
||||
|
@ -95,7 +95,7 @@ pub fn init_sig(
|
|||
call_conv: CallConv,
|
||||
func_index: FuncIndex,
|
||||
) -> WasmResult<ir::Signature> {
|
||||
let wsig = env.func_sig(func_index);
|
||||
let wsig = env.function_signature(func_index);
|
||||
init_sig_from_wsig(call_conv, wsig)
|
||||
}
|
||||
|
||||
|
@ -858,7 +858,7 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
|
|||
let oob = pos
|
||||
.ins()
|
||||
.icmp(IntCC::UnsignedGreaterThanOrEqual, callee, tlength);
|
||||
pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
|
||||
pos.ins().trapnz(oob, ir::TrapCode::OutOfBounds);
|
||||
|
||||
// 3. Load the wtable base pointer from a global.
|
||||
let tbase = pos.ins().global_value(POINTER_TYPE, base_gv);
|
||||
|
@ -1084,11 +1084,11 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
|
|||
fn translate_table_grow(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
table_index: TableIndex,
|
||||
table_index: u32,
|
||||
delta: ir::Value,
|
||||
init_value: ir::Value,
|
||||
) -> WasmResult<ir::Value> {
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
|
||||
Ok(self
|
||||
.instance_call(&mut pos, &FN_TABLE_GROW, &[init_value, delta, table_index])
|
||||
.unwrap())
|
||||
|
@ -1097,10 +1097,10 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
|
|||
fn translate_table_get(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
table_index: TableIndex,
|
||||
table_index: u32,
|
||||
index: ir::Value,
|
||||
) -> WasmResult<ir::Value> {
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
|
||||
Ok(self
|
||||
.instance_call(&mut pos, &FN_TABLE_GET, &[index, table_index])
|
||||
.unwrap())
|
||||
|
@ -1109,11 +1109,11 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
|
|||
fn translate_table_set(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
table_index: TableIndex,
|
||||
table_index: u32,
|
||||
value: ir::Value,
|
||||
index: ir::Value,
|
||||
) -> WasmResult<()> {
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
|
||||
self.instance_call(&mut pos, &FN_TABLE_SET, &[index, value, table_index]);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1146,12 +1146,12 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
|
|||
fn translate_table_fill(
|
||||
&mut self,
|
||||
mut pos: FuncCursor,
|
||||
table_index: TableIndex,
|
||||
table_index: u32,
|
||||
dst: ir::Value,
|
||||
val: ir::Value,
|
||||
len: ir::Value,
|
||||
) -> WasmResult<()> {
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
|
||||
let table_index = pos.ins().iconst(ir::types::I32, table_index as i64);
|
||||
self.instance_call(&mut pos, &FN_TABLE_FILL, &[dst, val, len, table_index]);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"fe108380fdfaac0d92a92302d0751df182b888e874e56e465f4241dbb670a92e","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
||||
{"files":{"Cargo.toml":"07d7670bb6f0c26fa3abb5d547d645b8b6ab32378dba33e3453122c8ba59c6b5","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"af367c67340fa7f6fb9a35b0aa637dcf303957f7ae7427a5f4f6356801c8bb04","src/lib.rs":"23a5c42d477197a947122e662068e681bb9ed31041c0b668c3267c3fce15d39e","src/map.rs":"a3b7f64cae7ec9c2a8038def315bcf90e8751552b1bc1c20b62fbb8c763866c4","src/node.rs":"28f7edd979f7b9712bc4ab30b0d2a1b8ad5485a4b1e8c09f3dcaf501b9b5ccd1","src/path.rs":"a86ee1c882c173e8af96fd53a416a0fb485dd3f045ac590ef313a9d9ecf90f56","src/pool.rs":"f6337b5417f7772e6878a160c1a40629199ff09997bdff18eb2a0ba770158600","src/set.rs":"281eb8b5ead1ffd395946464d881f9bb0e7fb61092aed701d72d2314b5f80994"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.64.0"
|
||||
version = "0.63.0"
|
||||
description = "A forest of B+-trees"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-bforest"
|
||||
|
@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-entity = { path = "../entity", version = "0.64.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.63.0", default-features = false }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"a19ba59829e25d67120787a454038986a6759f7d592dcf427924ebbcb5de6697","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"87c70a4856bb1c40ba6babed549aa7e01478375244dea605be0334ae6d0441e0","src/isa/x86/instructions.rs":"a2c81ff80e30980fe444aa1e56ba57c54911cee67c392c16bfbdf28f75151dc6","src/isa/x86/legalize.rs":"b5f68ea089c4237c7140ef0b8ff71f7c6a5f53884bf2158d81b52d3750bcacac","src/isa/x86/mod.rs":"ecc1d4de51bd44dbaa864fafebb68f66bc99fb8c9ad67a0fcb420bd1f87d1524","src/isa/x86/opcodes.rs":"f98dd104910efbfa3c211080c68a17da607ce585b9d81bf22cb255e58e51f99f","src/isa/x86/recipes.rs":"b71a3746ed39b08932dc1a0ce885b61eec2e8daf2e92d12eccc0d085e4587a1f","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"69623c2193458c838617e52e88d3ff91b71f3f07aec1f1494c0cabd7c332ad49","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"38b9a3b09bd86d020b841abe94eef003063b2cb12d9dc991a7743b2cc0bb3362","src/shared/legalize.rs":"55b186e09383cc16491a6a0dd79aa9149c1aba1927a7173701478818b8116795","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"0b4f903de5f2df19304c44bf4bd456c3a8e165103b38ccb13b6f88ae8a3c7ee8","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
||||
{"files":{"Cargo.toml":"2d1fae4231bb7d3c43ebcaccbc62d243440ab537a5b6bd40c653ece0bcda5a75","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"b275053977c0239211c1df35253154ba4dce2519f506088e71104de37d3db862","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"fd5a7418fa0d47cdf1b823b31553f1549c03e160ffffac9e22d611185774367e","src/isa/x86/encodings.rs":"a19e5dd7ba7fe74f2ec0a2367e61e2dab498113f8b2a2f1bc677b6ee486358d5","src/isa/x86/instructions.rs":"144e83591444115f2ab8d16777e322eb5c9d8eef123ad05d0c66811a029b662b","src/isa/x86/legalize.rs":"d2eb6cee5c885870250417f4d9086527c96f994542c9316baf14776b500e45b0","src/isa/x86/mod.rs":"65953f998ff3fc3b333167e9979fc0f15f976b51ad75272ac19dcaad0981b371","src/isa/x86/opcodes.rs":"44556abfc4a319a6e48aa878f10550b7878725ba0bf75ddc9bb6a0e6f4223c73","src/isa/x86/recipes.rs":"f142ae4ea1db29df0f3c9aedf0c5ee228682136526499f0c85aab101375d0c8c","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"49abb46533b3a5415cd033e0a98b5c9561e231f2dd9510d587dc69b204bb6706","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"89ed4074f748637adf56b93ba952e398c45d43e6326d01676885939e3fe8bc4a","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"8df3abeb47b52b7dc99f6e0bb16cf8a695ce4fe0a8d86035945a2612d1aa5a6d","src/shared/legalize.rs":"bc9c3292446c1d338df1c4ce19f3ac5482cfe582a04a5a1e82fc9aaa6aef25ea","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"9460758f04ccfc9129ea4d4081571fe4a3ac574c3d25b6473f888fbbb506b9d3","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
|
|
@ -1,19 +1,19 @@
|
|||
[package]
|
||||
name = "cranelift-codegen-meta"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.64.0"
|
||||
version = "0.63.0"
|
||||
description = "Metaprogram for cranelift-codegen code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
readme = "README.md"
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--document-private-items" ]
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.64.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.64.0" }
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.63.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.63.0" }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--document-private-items" ]
|
||||
|
|
|
@ -874,32 +874,17 @@ fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
|
|||
args.join(", ")
|
||||
);
|
||||
|
||||
let imms_need_sign_extension = format
|
||||
.imm_fields
|
||||
.iter()
|
||||
.any(|f| f.kind.rust_type == "ir::immediates::Imm64");
|
||||
|
||||
fmt.doc_comment(format.to_string());
|
||||
fmt.line("#[allow(non_snake_case)]");
|
||||
fmtln!(fmt, "fn {} {{", proto);
|
||||
fmt.indent(|fmt| {
|
||||
// Generate the instruction data.
|
||||
fmtln!(
|
||||
fmt,
|
||||
"let{} data = ir::InstructionData::{} {{",
|
||||
if imms_need_sign_extension { " mut" } else { "" },
|
||||
format.name
|
||||
);
|
||||
fmtln!(fmt, "let data = ir::InstructionData::{} {{", format.name);
|
||||
fmt.indent(|fmt| {
|
||||
fmt.line("opcode,");
|
||||
gen_member_inits(format, fmt);
|
||||
});
|
||||
fmtln!(fmt, "};");
|
||||
|
||||
if imms_need_sign_extension {
|
||||
fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
|
||||
}
|
||||
|
||||
fmt.line("self.build(data, ctrl_typevar)");
|
||||
});
|
||||
fmtln!(fmt, "}");
|
||||
|
|
|
@ -64,7 +64,7 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
|
|||
|
||||
// R-type with an immediate shift amount instead of rs2.
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
|
||||
EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![gpr])
|
||||
.emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
|
||||
|
@ -79,11 +79,11 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
|
|||
);
|
||||
|
||||
recipes.push(
|
||||
EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
|
||||
EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![gpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm64,
|
||||
&*formats.binary_imm,
|
||||
"imm",
|
||||
12,
|
||||
0,
|
||||
|
|
|
@ -689,12 +689,6 @@ fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r:
|
|||
}
|
||||
}
|
||||
}
|
||||
for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
|
||||
e.enc_both(
|
||||
bint.bind(*to).bind(*from),
|
||||
rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
|
||||
);
|
||||
}
|
||||
|
||||
// Copy Special
|
||||
// For x86-64, only define REX forms for now, since we can't describe the
|
||||
|
@ -1454,7 +1448,6 @@ fn define_alu(
|
|||
// x86 has a bitwise not instruction NOT.
|
||||
e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
|
||||
e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
|
||||
e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
|
||||
|
||||
// Also add a `b1` encodings for the logic instructions.
|
||||
// TODO: Should this be done with 8-bit instructions? It would improve partial register
|
||||
|
@ -1494,13 +1487,8 @@ fn define_alu(
|
|||
for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
|
||||
// Cannot use enc_i32_i64 for this pattern because instructions require
|
||||
// to bind any.
|
||||
e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
|
||||
e.enc32(
|
||||
inst.bind(I32).bind(I16),
|
||||
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
|
||||
);
|
||||
e.enc32(
|
||||
inst.bind(I32).bind(I32),
|
||||
inst.bind(I32).bind(Any),
|
||||
rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
|
||||
);
|
||||
e.enc64(
|
||||
|
@ -1613,11 +1601,8 @@ fn define_simd(
|
|||
let sadd_sat = shared.by_name("sadd_sat");
|
||||
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||
let sload8x8 = shared.by_name("sload8x8");
|
||||
let sload8x8_complex = shared.by_name("sload8x8_complex");
|
||||
let sload16x4 = shared.by_name("sload16x4");
|
||||
let sload16x4_complex = shared.by_name("sload16x4_complex");
|
||||
let sload32x2 = shared.by_name("sload32x2");
|
||||
let sload32x2_complex = shared.by_name("sload32x2_complex");
|
||||
let spill = shared.by_name("spill");
|
||||
let sqrt = shared.by_name("sqrt");
|
||||
let sshr_imm = shared.by_name("sshr_imm");
|
||||
|
@ -1626,15 +1611,11 @@ fn define_simd(
|
|||
let store_complex = shared.by_name("store_complex");
|
||||
let uadd_sat = shared.by_name("uadd_sat");
|
||||
let uload8x8 = shared.by_name("uload8x8");
|
||||
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||
let uload16x4 = shared.by_name("uload16x4");
|
||||
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
||||
let uload32x2 = shared.by_name("uload32x2");
|
||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||
let ushr_imm = shared.by_name("ushr_imm");
|
||||
let usub_sat = shared.by_name("usub_sat");
|
||||
let vconst = shared.by_name("vconst");
|
||||
let vselect = shared.by_name("vselect");
|
||||
let x86_insertps = x86.by_name("x86_insertps");
|
||||
let x86_movlhps = x86.by_name("x86_movlhps");
|
||||
let x86_movsd = x86.by_name("x86_movsd");
|
||||
|
@ -1645,8 +1626,6 @@ fn define_simd(
|
|||
let x86_pmaxu = x86.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86.by_name("x86_pmins");
|
||||
let x86_pminu = x86.by_name("x86_pminu");
|
||||
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||
let x86_pmuludq = x86.by_name("x86_pmuludq");
|
||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
|
@ -1657,7 +1636,6 @@ fn define_simd(
|
|||
let x86_punpckl = x86.by_name("x86_punpckl");
|
||||
|
||||
// Shorthands for recipes.
|
||||
let rec_blend = r.template("blend");
|
||||
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
||||
let rec_f_ib = r.template("f_ib");
|
||||
let rec_fa = r.template("fa");
|
||||
|
@ -1727,20 +1705,6 @@ fn define_simd(
|
|||
e.enc_both_inferred(instruction, template);
|
||||
}
|
||||
|
||||
// SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
|
||||
// either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
|
||||
// for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let opcode = match ty.lane_bits() {
|
||||
32 => &BLENDVPS,
|
||||
64 => &BLENDVPD,
|
||||
_ => &PBLENDVB,
|
||||
};
|
||||
let instruction = vselect.bind(vector(ty, sse_vector_size));
|
||||
let template = rec_blend.opcodes(opcode);
|
||||
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
|
||||
}
|
||||
|
||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||
// written to the low doubleword of the register and the register is zero-extended to 128 bits."
|
||||
|
@ -2013,35 +1977,6 @@ fn define_simd(
|
|||
}
|
||||
}
|
||||
|
||||
// SIMD load extend (complex addressing)
|
||||
let is_load_complex_length_two =
|
||||
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
|
||||
for (inst, opcodes) in &[
|
||||
(uload8x8_complex, &PMOVZXBW),
|
||||
(uload16x4_complex, &PMOVZXWD),
|
||||
(uload32x2_complex, &PMOVZXDQ),
|
||||
(sload8x8_complex, &PMOVSXBW),
|
||||
(sload16x4_complex, &PMOVSXWD),
|
||||
(sload32x2_complex, &PMOVSXDQ),
|
||||
] {
|
||||
for recipe in &[
|
||||
rec_fldWithIndex,
|
||||
rec_fldWithIndexDisp8,
|
||||
rec_fldWithIndexDisp32,
|
||||
] {
|
||||
let template = recipe.opcodes(*opcodes);
|
||||
let predicate = |encoding: EncodingBuilder| {
|
||||
encoding
|
||||
.isa_predicate(use_sse41_simd)
|
||||
.inst_predicate(is_load_complex_length_two.clone())
|
||||
};
|
||||
e.enc32_func(inst.clone(), template.clone(), predicate);
|
||||
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
|
||||
e.enc64_func(inst.clone(), template.rex(), predicate);
|
||||
e.enc64_func(inst.clone(), template, predicate);
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD integer addition
|
||||
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
||||
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
||||
|
@ -2101,14 +2036,12 @@ fn define_simd(
|
|||
e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
||||
}
|
||||
|
||||
// SIMD multiplication with lane expansion.
|
||||
e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
|
||||
|
||||
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||
{
|
||||
let imul = imul.bind(vector(I64, sse_vector_size));
|
||||
e.enc_32_64_maybe_isap(
|
||||
x86_pmullq,
|
||||
rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
|
||||
imul,
|
||||
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
|
||||
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
|
||||
);
|
||||
}
|
||||
|
@ -2184,11 +2117,8 @@ fn define_simd(
|
|||
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
|
||||
|
||||
// One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
|
||||
if *ty != I64 {
|
||||
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
||||
}
|
||||
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
|
||||
}
|
||||
|
||||
// SIMD integer comparisons
|
||||
|
@ -2293,7 +2223,8 @@ fn define_entity_ref(
|
|||
let rec_gvaddr8 = r.template("gvaddr8");
|
||||
let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
|
||||
let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
|
||||
let rec_spaddr_id = r.template("spaddr_id");
|
||||
let rec_spaddr4_id = r.template("spaddr4_id");
|
||||
let rec_spaddr8_id = r.template("spaddr8_id");
|
||||
|
||||
// Predicates shorthands.
|
||||
let all_ones_funcaddrs_and_not_is_pic =
|
||||
|
@ -2381,8 +2312,8 @@ fn define_entity_ref(
|
|||
//
|
||||
// TODO: Add encoding rules for stack_load and stack_store, so that they
|
||||
// don't get legalized to stack_addr + load/store.
|
||||
e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
|
||||
e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
|
||||
e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
|
||||
e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
|
||||
|
||||
// Constant addresses (PIC).
|
||||
e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
|
||||
|
|
|
@ -283,7 +283,7 @@ pub(crate) fn define(
|
|||
Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
|
||||
register and re-orders the data according to the passed immediate byte.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
&formats.extract_lane,
|
||||
)
|
||||
.operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
|
@ -314,7 +314,7 @@ pub(crate) fn define(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
&formats.extract_lane,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -342,9 +342,9 @@ pub(crate) fn define(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
&formats.insert_lane,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -369,9 +369,9 @@ pub(crate) fn define(
|
|||
extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
|
||||
floats, which are already stored in an XMM register.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
&formats.insert_lane,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -475,11 +475,10 @@ pub(crate) fn define(
|
|||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let I128 = &TypeVar::new(
|
||||
"I128",
|
||||
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
|
||||
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
|
||||
upper lane is concatenated with the lower lane to form the integer)",
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing one large integer (the upper lane is concatenated with \
|
||||
the lower lane to form the integer)",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
|
@ -488,7 +487,7 @@ pub(crate) fn define(
|
|||
);
|
||||
|
||||
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
|
||||
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
|
||||
let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
|
||||
let a = &Operand::new("a", IxN);
|
||||
|
||||
ig.push(
|
||||
|
@ -533,47 +532,6 @@ pub(crate) fn define(
|
|||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing two 64-bit integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I64x2);
|
||||
let y = &Operand::new("y", I64x2);
|
||||
let a = &Operand::new("a", I64x2);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmullq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
|
||||
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
|
||||
encodings for CPUs with newer vector features.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmuludq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
|
||||
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
|
||||
overflow as in `x86_pmullq`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let f = &Operand::new("f", iflags);
|
||||
|
|
|
@ -8,7 +8,7 @@ use crate::shared::Definitions as SharedDefinitions;
|
|||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
let mut expand = TransformGroupBuilder::new(
|
||||
let mut group = TransformGroupBuilder::new(
|
||||
"x86_expand",
|
||||
r#"
|
||||
Legalize instructions by expansion.
|
||||
|
@ -18,37 +18,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("expand_flags").id);
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
let mut narrow_avx = TransformGroupBuilder::new(
|
||||
"x86_narrow_avx",
|
||||
r#"
|
||||
Legalize instructions by narrowing with CPU feature checks.
|
||||
|
||||
This special case converts using x86 AVX instructions where available."#,
|
||||
)
|
||||
.isa("x86");
|
||||
// We cannot chain with the x86_narrow group until this group is built, see bottom of this
|
||||
// function for where this is chained.
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
// List of instructions.
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
|
@ -68,8 +37,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let isub = insts.by_name("isub");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ireduce = insts.by_name("ireduce");
|
||||
let popcnt = insts.by_name("popcnt");
|
||||
let sdiv = insts.by_name("sdiv");
|
||||
let selectif = insts.by_name("selectif");
|
||||
|
@ -78,7 +45,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let tls_value = insts.by_name("tls_value");
|
||||
let udiv = insts.by_name("udiv");
|
||||
let umulhi = insts.by_name("umulhi");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let urem = insts.by_name("urem");
|
||||
|
||||
|
@ -89,40 +55,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
|
||||
// the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
|
||||
// not encodable.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ishl_by_i64 = ishl.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ishl_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ushr_by_i64 = ushr.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ushr_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
// Division and remainder.
|
||||
//
|
||||
// The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||
// allowed to trap. The other ops need to check avoid_div_traps.
|
||||
expand.custom_legalize(sdiv, "expand_sdivrem");
|
||||
expand.custom_legalize(srem, "expand_sdivrem");
|
||||
expand.custom_legalize(udiv, "expand_udivrem");
|
||||
expand.custom_legalize(urem, "expand_udivrem");
|
||||
group.custom_legalize(sdiv, "expand_sdivrem");
|
||||
group.custom_legalize(srem, "expand_sdivrem");
|
||||
group.custom_legalize(udiv, "expand_udivrem");
|
||||
group.custom_legalize(urem, "expand_udivrem");
|
||||
|
||||
// Double length (widening) multiplication.
|
||||
let a = var("a");
|
||||
|
@ -133,12 +73,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let res_lo = var("res_lo");
|
||||
let res_hi = var("res_hi");
|
||||
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(res_hi = umulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
|
||||
);
|
||||
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(res_hi = smulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
|
||||
);
|
||||
|
@ -157,7 +97,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
|
||||
|
||||
// Equality needs an explicit `ord` test which checks the parity bit.
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = fcmp(floatcc_eq, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_ord, x, y)),
|
||||
|
@ -165,7 +105,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
def!(a = band(a1, a2)),
|
||||
],
|
||||
);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = fcmp(floatcc_ne, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_uno, x, y)),
|
||||
|
@ -190,20 +130,20 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
(floatcc_ugt, floatcc_ult),
|
||||
(floatcc_uge, floatcc_ule),
|
||||
] {
|
||||
expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
}
|
||||
|
||||
// We need to modify the CFG for min/max legalization.
|
||||
expand.custom_legalize(fmin, "expand_minmax");
|
||||
expand.custom_legalize(fmax, "expand_minmax");
|
||||
group.custom_legalize(fmin, "expand_minmax");
|
||||
group.custom_legalize(fmax, "expand_minmax");
|
||||
|
||||
// Conversions from unsigned need special handling.
|
||||
expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
// Conversions from float to int can trap and modify the control flow graph.
|
||||
expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
|
||||
// Count leading and trailing zeroes, for baseline x86_64
|
||||
let c_minus_one = var("c_minus_one");
|
||||
|
@ -218,7 +158,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
let imm64_minus_one = Literal::constant(&imm.imm64, -1);
|
||||
let imm64_63 = Literal::constant(&imm.imm64, 63);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = clz.I64(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
|
@ -230,7 +170,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_31 = Literal::constant(&imm.imm64, 31);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = clz.I32(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
|
@ -242,7 +182,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_64 = Literal::constant(&imm.imm64, 64);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = ctz.I64(x)),
|
||||
vec![
|
||||
def!(c_sixty_four = iconst(imm64_64)),
|
||||
|
@ -252,7 +192,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
);
|
||||
|
||||
let imm64_32 = Literal::constant(&imm.imm64, 32);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(a = ctz.I32(x)),
|
||||
vec![
|
||||
def!(c_thirty_two = iconst(imm64_32)),
|
||||
|
@ -285,7 +225,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
|
||||
let imm64_1 = Literal::constant(&imm.imm64, 1);
|
||||
let imm64_4 = Literal::constant(&imm.imm64, 4);
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(r = popcnt.I64(x)),
|
||||
vec![
|
||||
def!(qv3 = ushr_imm(x, imm64_1)),
|
||||
|
@ -326,7 +266,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
let lc0F = var("lc0F");
|
||||
let lc01 = var("lc01");
|
||||
|
||||
expand.legalize(
|
||||
group.legalize(
|
||||
def!(r = popcnt.I32(x)),
|
||||
vec![
|
||||
def!(lv3 = ushr_imm(x, imm64_1)),
|
||||
|
@ -349,27 +289,31 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||
],
|
||||
);
|
||||
|
||||
expand.custom_legalize(ineg, "convert_ineg");
|
||||
expand.custom_legalize(tls_value, "expand_tls_value");
|
||||
group.custom_legalize(ineg, "convert_ineg");
|
||||
|
||||
group.custom_legalize(tls_value, "expand_tls_value");
|
||||
|
||||
group.build_and_add_to(&mut shared.transform_groups);
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
widen.custom_legalize(ineg, "convert_ineg");
|
||||
|
||||
// To reduce compilation times, separate out large blocks of legalizations by theme.
|
||||
define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
|
||||
|
||||
expand.build_and_add_to(&mut shared.transform_groups);
|
||||
let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
narrow_avx
|
||||
.chain_with(narrow_id)
|
||||
.build_and_add_to(&mut shared.transform_groups);
|
||||
widen.build_and_add_to(&mut shared.transform_groups);
|
||||
|
||||
// To reduce compilation times, separate out large blocks of legalizations by
|
||||
// theme.
|
||||
define_simd(shared, x86_instructions);
|
||||
}
|
||||
|
||||
fn define_simd(
|
||||
shared: &mut SharedDefinitions,
|
||||
x86_instructions: &InstructionGroup,
|
||||
narrow: &mut TransformGroupBuilder,
|
||||
narrow_avx: &mut TransformGroupBuilder,
|
||||
) {
|
||||
fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let band_not = insts.by_name("band_not");
|
||||
|
@ -386,7 +330,6 @@ fn define_simd(
|
|||
let icmp = insts.by_name("icmp");
|
||||
let imax = insts.by_name("imax");
|
||||
let imin = insts.by_name("imin");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let insertlane = insts.by_name("insertlane");
|
||||
let ishl = insts.by_name("ishl");
|
||||
|
@ -406,7 +349,6 @@ fn define_simd(
|
|||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_packss = x86_instructions.by_name("x86_packss");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
|
@ -422,6 +364,16 @@ fn define_simd(
|
|||
|
||||
let imm = &shared.imm;
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
// Set up variables and immediates.
|
||||
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||
|
@ -478,7 +430,7 @@ fn define_simd(
|
|||
// Move into the lowest 16 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Insert the value again but in the next lowest 16 bits.
|
||||
def!(b = insertlane(a, x, uimm8_one)),
|
||||
def!(b = insertlane(a, uimm8_one, x)),
|
||||
// No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
|
||||
def!(c = raw_bitcast_any16x8_to_i32x4(b)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
|
@ -512,7 +464,7 @@ fn define_simd(
|
|||
// Move into the lowest 64 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Move into the highest 64 bits of the same XMM register.
|
||||
def!(y = insertlane(a, x, uimm8_one)),
|
||||
def!(y = insertlane(a, uimm8_one, x)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
@ -541,8 +493,8 @@ fn define_simd(
|
|||
);
|
||||
}
|
||||
|
||||
// SIMD shift right (arithmetic, i16x8 and i32x4)
|
||||
for ty in &[I16, I32] {
|
||||
// SIMD shift right (arithmetic)
|
||||
for ty in &[I16, I32, I64] {
|
||||
let sshr = sshr.bind(vector(*ty, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
narrow.legalize(
|
||||
|
@ -550,7 +502,6 @@ fn define_simd(
|
|||
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i8x16)
|
||||
{
|
||||
let sshr = sshr.bind(vector(I8, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
|
@ -575,25 +526,6 @@ fn define_simd(
|
|||
],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i64x2)
|
||||
{
|
||||
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
|
||||
let sshr_scalar_lane0 = sshr.bind(I64);
|
||||
let sshr_scalar_lane1 = sshr.bind(I64);
|
||||
narrow.legalize(
|
||||
def!(z = sshr_vector(x, y)),
|
||||
vec![
|
||||
// Use scalar operations to shift the first lane.
|
||||
def!(a = extractlane(x, uimm8_zero)),
|
||||
def!(b = sshr_scalar_lane0(a, y)),
|
||||
def!(c = insertlane(x, b, uimm8_zero)),
|
||||
// Do the same for the second lane.
|
||||
def!(d = extractlane(x, uimm8_one)),
|
||||
def!(e = sshr_scalar_lane1(d, y)),
|
||||
def!(z = insertlane(c, e, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD select
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
|
@ -608,17 +540,6 @@ fn define_simd(
|
|||
);
|
||||
}
|
||||
|
||||
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
|
||||
// This works, because each lane of boolean vector is filled with zeroes or ones.
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vselect = vselect.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(d = vselect(c, x, y)),
|
||||
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vany_true
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
|
@ -788,6 +709,5 @@ fn define_simd(
|
|||
narrow.custom_legalize(ushr, "convert_ushr");
|
||||
narrow.custom_legalize(ishl, "convert_ishl");
|
||||
|
||||
// This lives in the expand group to avoid conflicting with, e.g., i128 legalizations.
|
||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||
narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::cdsl::cpu_modes::CpuMode;
|
||||
use crate::cdsl::isa::TargetIsa;
|
||||
use crate::cdsl::types::{ReferenceType, VectorType};
|
||||
use crate::cdsl::types::ReferenceType;
|
||||
|
||||
use crate::shared::types::Bool::B1;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
|
@ -35,7 +35,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
|
||||
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
|
||||
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
|
||||
let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
|
||||
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
|
||||
|
||||
x86_32.legalize_monomorphic(expand_flags);
|
||||
|
@ -47,7 +46,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
|
||||
x86_32.legalize_type(F32, x86_expand);
|
||||
x86_32.legalize_type(F64, x86_expand);
|
||||
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
|
||||
x86_64.legalize_monomorphic(expand_flags);
|
||||
x86_64.legalize_default(x86_narrow);
|
||||
|
@ -59,7 +57,6 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
|||
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
|
||||
x86_64.legalize_type(F32, x86_expand);
|
||||
x86_64.legalize_type(F64, x86_expand);
|
||||
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
|
||||
let recipes = recipes::define(shared_defs, &settings, ®s);
|
||||
|
||||
|
|
|
@ -54,14 +54,6 @@ pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
|
|||
/// Bit scan reverse (stores index of first encountered 1 from the back).
|
||||
pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
|
||||
|
||||
/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
|
||||
|
||||
/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
|
||||
|
||||
/// Call near, relative, displacement relative to next instruction (sign-extended).
|
||||
pub static CALL_RELATIVE: [u8; 1] = [0xe8];
|
||||
|
||||
|
@ -343,10 +335,6 @@ pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
|
|||
/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
|
||||
|
||||
/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
|
||||
/// in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||
|
||||
|
@ -471,11 +459,7 @@ pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
|||
|
||||
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
|
||||
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||
pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
|
||||
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
|
||||
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
|
||||
pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||
pub static POP_REG: [u8; 1] = [0x58];
|
||||
|
|
|
@ -427,7 +427,6 @@ pub(crate) fn define<'shared>(
|
|||
let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
|
||||
let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
|
||||
let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
|
||||
let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
|
||||
|
||||
// Stack operand with a 32-bit signed displacement from either RBP or RSP.
|
||||
let stack_gpr32 = Stack::new(gpr);
|
||||
|
@ -608,12 +607,12 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r with FPR ins and outs. A form with a byte immediate.
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
|
||||
EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
|
||||
.operands_in(vec![fpr, fpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.ternary_imm8,
|
||||
"imm",
|
||||
&*formats.insert_lane,
|
||||
"lane",
|
||||
8,
|
||||
0,
|
||||
))
|
||||
|
@ -621,7 +620,7 @@ pub(crate) fn define<'shared>(
|
|||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
let imm: i64 = imm.into();
|
||||
let imm:i64 = lane.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -905,32 +904,14 @@ pub(crate) fn define<'shared>(
|
|||
.inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
|
||||
);
|
||||
|
||||
// XX /r for BLEND* instructions
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
|
||||
.operands_in(vec![
|
||||
OperandConstraint::FixedReg(reg_xmm0),
|
||||
OperandConstraint::RegClass(fpr),
|
||||
OperandConstraint::RegClass(fpr),
|
||||
])
|
||||
.operands_out(vec![2])
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
|
||||
modrm_rr(in_reg1, in_reg2, sink);
|
||||
"#,
|
||||
),
|
||||
"size_with_inferred_rex_for_inreg1_inreg2",
|
||||
);
|
||||
|
||||
// XX /n ib with 8-bit immediate sign-extended.
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
|
||||
EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm64,
|
||||
&*formats.binary_imm,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
|
@ -947,11 +928,11 @@ pub(crate) fn define<'shared>(
|
|||
);
|
||||
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
|
||||
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm64,
|
||||
&*formats.binary_imm,
|
||||
"imm",
|
||||
8,
|
||||
0,
|
||||
|
@ -970,11 +951,11 @@ pub(crate) fn define<'shared>(
|
|||
// XX /n id with 32-bit immediate sign-extended.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
|
||||
EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_signed_int(
|
||||
&*formats.binary_imm64,
|
||||
&*formats.binary_imm,
|
||||
"imm",
|
||||
32,
|
||||
0,
|
||||
|
@ -996,20 +977,20 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![fpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.binary_imm8,
|
||||
"imm",
|
||||
&*formats.extract_lane,
|
||||
"lane",
|
||||
8,
|
||||
0,
|
||||
))
|
||||
)) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
let imm: i64 = imm.into();
|
||||
let imm:i64 = lane.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -1020,17 +1001,17 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
|
||||
.operands_in(vec![fpr])
|
||||
.operands_out(vec![gpr])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.binary_imm8, "imm", 8, 0,
|
||||
&*formats.extract_lane, "lane", 8, 0,
|
||||
))
|
||||
.emit(
|
||||
r#"
|
||||
{{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
|
||||
modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
|
||||
let imm: i64 = imm.into();
|
||||
let imm:i64 = lane.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
), "size_with_inferred_rex_for_inreg0_outreg0"
|
||||
|
@ -1040,12 +1021,12 @@ pub(crate) fn define<'shared>(
|
|||
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
||||
{
|
||||
recipes.add_template_inferred(
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
|
||||
EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
|
||||
.operands_in(vec![fpr, gpr])
|
||||
.operands_out(vec![0])
|
||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||
&*formats.ternary_imm8,
|
||||
"imm",
|
||||
&*formats.insert_lane,
|
||||
"lane",
|
||||
8,
|
||||
0,
|
||||
))
|
||||
|
@ -1053,7 +1034,7 @@ pub(crate) fn define<'shared>(
|
|||
r#"
|
||||
{{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
let imm: i64 = imm.into();
|
||||
let imm:i64 = lane.into();
|
||||
sink.put1(imm as u8);
|
||||
"#,
|
||||
),
|
||||
|
@ -1451,7 +1432,23 @@ pub(crate) fn define<'shared>(
|
|||
// TODO Alternative forms for 8-bit immediates, when applicable.
|
||||
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
|
||||
EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6)
|
||||
.operands_out(vec![gpr])
|
||||
.emit(
|
||||
r#"
|
||||
let sp = StackRef::sp(stack_slot, &func.stack_slots);
|
||||
let base = stk_base(sp.base);
|
||||
{{PUT_OP}}(bits, rex2(out_reg0, base), sink);
|
||||
modrm_sib_disp8(out_reg0, sink);
|
||||
sib_noindex(base, sink);
|
||||
let imm : i32 = offset.into();
|
||||
sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
|
||||
"#,
|
||||
),
|
||||
);
|
||||
|
||||
recipes.add_template_recipe(
|
||||
EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6)
|
||||
.operands_out(vec![gpr])
|
||||
.emit(
|
||||
r#"
|
||||
|
@ -2874,12 +2871,12 @@ pub(crate) fn define<'shared>(
|
|||
|
||||
{
|
||||
let has_small_offset =
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0);
|
||||
|
||||
// XX /n, MI form with imm8.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
|
||||
EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![reg_rflags])
|
||||
.inst_predicate(has_small_offset)
|
||||
|
@ -2897,12 +2894,12 @@ pub(crate) fn define<'shared>(
|
|||
);
|
||||
|
||||
let has_big_offset =
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
|
||||
InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0);
|
||||
|
||||
// XX /n, MI form with imm32.
|
||||
recipes.add_template(
|
||||
Template::new(
|
||||
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
|
||||
EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5)
|
||||
.operands_in(vec![gpr])
|
||||
.operands_out(vec![reg_rflags])
|
||||
.inst_predicate(has_big_offset)
|
||||
|
|
|
@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
|
|||
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
let mut settings = SettingGroupBuilder::new("x86");
|
||||
|
||||
settings.add_bool(
|
||||
"use_new_backend",
|
||||
"Whether to use the new codegen backend using the new isel",
|
||||
false,
|
||||
);
|
||||
|
||||
// CPUID.01H:ECX
|
||||
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
|
||||
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::rc::Rc;
|
|||
|
||||
pub(crate) struct Formats {
|
||||
pub(crate) binary: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm64: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm: Rc<InstructionFormat>,
|
||||
pub(crate) branch: Rc<InstructionFormat>,
|
||||
pub(crate) branch_float: Rc<InstructionFormat>,
|
||||
pub(crate) branch_icmp: Rc<InstructionFormat>,
|
||||
|
@ -17,13 +17,14 @@ pub(crate) struct Formats {
|
|||
pub(crate) cond_trap: Rc<InstructionFormat>,
|
||||
pub(crate) copy_special: Rc<InstructionFormat>,
|
||||
pub(crate) copy_to_ssa: Rc<InstructionFormat>,
|
||||
pub(crate) binary_imm8: Rc<InstructionFormat>,
|
||||
pub(crate) extract_lane: Rc<InstructionFormat>,
|
||||
pub(crate) float_compare: Rc<InstructionFormat>,
|
||||
pub(crate) float_cond: Rc<InstructionFormat>,
|
||||
pub(crate) float_cond_trap: Rc<InstructionFormat>,
|
||||
pub(crate) func_addr: Rc<InstructionFormat>,
|
||||
pub(crate) heap_addr: Rc<InstructionFormat>,
|
||||
pub(crate) indirect_jump: Rc<InstructionFormat>,
|
||||
pub(crate) insert_lane: Rc<InstructionFormat>,
|
||||
pub(crate) int_compare: Rc<InstructionFormat>,
|
||||
pub(crate) int_compare_imm: Rc<InstructionFormat>,
|
||||
pub(crate) int_cond: Rc<InstructionFormat>,
|
||||
|
@ -44,7 +45,6 @@ pub(crate) struct Formats {
|
|||
pub(crate) store_complex: Rc<InstructionFormat>,
|
||||
pub(crate) table_addr: Rc<InstructionFormat>,
|
||||
pub(crate) ternary: Rc<InstructionFormat>,
|
||||
pub(crate) ternary_imm8: Rc<InstructionFormat>,
|
||||
pub(crate) trap: Rc<InstructionFormat>,
|
||||
pub(crate) unary: Rc<InstructionFormat>,
|
||||
pub(crate) unary_bool: Rc<InstructionFormat>,
|
||||
|
@ -76,9 +76,7 @@ impl Formats {
|
|||
|
||||
binary: Builder::new("Binary").value().value().build(),
|
||||
|
||||
binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
|
||||
|
||||
binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
|
||||
binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(),
|
||||
|
||||
// The select instructions are controlled by the second VALUE operand.
|
||||
// The first VALUE operand is the controlling flag which has a derived type.
|
||||
|
@ -90,18 +88,23 @@ impl Formats {
|
|||
.typevar_operand(1)
|
||||
.build(),
|
||||
|
||||
ternary_imm8: Builder::new("TernaryImm8")
|
||||
.value()
|
||||
.imm(&imm.uimm8)
|
||||
.value()
|
||||
.build(),
|
||||
|
||||
// Catch-all for instructions with many outputs and inputs and no immediate
|
||||
// operands.
|
||||
multiary: Builder::new("MultiAry").varargs().build(),
|
||||
|
||||
nullary: Builder::new("NullAry").build(),
|
||||
|
||||
insert_lane: Builder::new("InsertLane")
|
||||
.value()
|
||||
.imm_with_name("lane", &imm.uimm8)
|
||||
.value()
|
||||
.build(),
|
||||
|
||||
extract_lane: Builder::new("ExtractLane")
|
||||
.value()
|
||||
.imm_with_name("lane", &imm.uimm8)
|
||||
.build(),
|
||||
|
||||
shuffle: Builder::new("Shuffle")
|
||||
.value()
|
||||
.value()
|
||||
|
|
|
@ -559,9 +559,9 @@ fn define_simd_lane_access(
|
|||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
&formats.insert_lane,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_in(vec![x, Idx, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
|
@ -579,7 +579,7 @@ fn define_simd_lane_access(
|
|||
may or may not be zeroed depending on the ISA but the type system should prevent using
|
||||
``a`` as anything other than the extracted value.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
&formats.extract_lane,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -1172,20 +1172,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload8x8_complex",
|
||||
r#"
|
||||
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i16x8 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload8x8",
|
||||
|
@ -1200,20 +1186,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload8x8_complex",
|
||||
r#"
|
||||
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i16x8 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let I32x4 = &TypeVar::new(
|
||||
"I32x4",
|
||||
"A SIMD vector with exactly 4 lanes of 32-bit values",
|
||||
|
@ -1229,7 +1201,7 @@ pub(crate) fn define(
|
|||
Inst::new(
|
||||
"uload16x4",
|
||||
r#"
|
||||
Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
||||
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
||||
vector.
|
||||
"#,
|
||||
&formats.load,
|
||||
|
@ -1239,20 +1211,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload16x4_complex",
|
||||
r#"
|
||||
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i32x4 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload16x4",
|
||||
|
@ -1267,20 +1225,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload16x4_complex",
|
||||
r#"
|
||||
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i32x4 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector with exactly 2 lanes of 64-bit values",
|
||||
|
@ -1306,20 +1250,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uload32x2_complex",
|
||||
r#"
|
||||
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||
i64x2 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload32x2",
|
||||
|
@ -1334,20 +1264,6 @@ pub(crate) fn define(
|
|||
.can_load(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"sload32x2_complex",
|
||||
r#"
|
||||
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||
i64x2 vector.
|
||||
"#,
|
||||
&formats.load_complex,
|
||||
)
|
||||
.operands_in(vec![MemFlags, args, Offset])
|
||||
.operands_out(vec![a])
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", Mem).with_doc("Value to be stored");
|
||||
let a = &Operand::new("a", Mem).with_doc("Value loaded");
|
||||
let Offset =
|
||||
|
@ -2215,7 +2131,7 @@ pub(crate) fn define(
|
|||
Like `icmp_imm`, but returns integer CPU flags instead of testing
|
||||
a specific condition code.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![f]),
|
||||
|
@ -2265,7 +2181,7 @@ pub(crate) fn define(
|
|||
This is similar to `iadd` but the operands are interpreted as signed integers and their
|
||||
summed result, instead of wrapping, will be saturated to the lowest or highest
|
||||
signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
|
||||
since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
|
||||
since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
|
||||
clamped to `0x7F`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
|
@ -2460,7 +2376,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2475,7 +2391,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2489,7 +2405,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2505,7 +2421,7 @@ pub(crate) fn define(
|
|||
representable in `B` bits two's complement. This only happens
|
||||
when `x = -2^{B-1}, Y = -1`.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2519,7 +2435,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2533,7 +2449,7 @@ pub(crate) fn define(
|
|||
|
||||
This operation traps if the divisor is zero.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2552,7 +2468,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2952,7 +2868,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2969,7 +2885,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -2986,7 +2902,7 @@ pub(crate) fn define(
|
|||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3031,7 +2947,7 @@ pub(crate) fn define(
|
|||
r#"
|
||||
Rotate left by immediate.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3043,7 +2959,7 @@ pub(crate) fn define(
|
|||
r#"
|
||||
Rotate right by immediate.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3118,7 +3034,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of ``x``.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3132,7 +3048,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of the register.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
@ -3146,7 +3062,7 @@ pub(crate) fn define(
|
|||
|
||||
The shift amount is masked to the size of the register.
|
||||
"#,
|
||||
&formats.binary_imm64,
|
||||
&formats.binary_imm,
|
||||
)
|
||||
.operands_in(vec![x, Y])
|
||||
.operands_out(vec![a]),
|
||||
|
|
|
@ -61,7 +61,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
let cls = insts.by_name("cls");
|
||||
let clz = insts.by_name("clz");
|
||||
let ctz = insts.by_name("ctz");
|
||||
let copy = insts.by_name("copy");
|
||||
let fabs = insts.by_name("fabs");
|
||||
let f32const = insts.by_name("f32const");
|
||||
let f64const = insts.by_name("f64const");
|
||||
|
@ -199,6 +198,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
let ah = var("ah");
|
||||
let cc = var("cc");
|
||||
let block = var("block");
|
||||
let block1 = var("block1");
|
||||
let block2 = var("block2");
|
||||
let ptr = var("ptr");
|
||||
let flags = var("flags");
|
||||
let offset = var("off");
|
||||
|
@ -211,8 +212,8 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
// embedded as part of arguments), so use a custom legalization for now.
|
||||
narrow.custom_legalize(iconst, "narrow_iconst");
|
||||
|
||||
for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
|
||||
let inst = uextend.bind(ty).bind(ty_half);
|
||||
{
|
||||
let inst = uextend.bind(I128).bind(I64);
|
||||
narrow.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![
|
||||
|
@ -222,12 +223,12 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
);
|
||||
}
|
||||
|
||||
for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
|
||||
let inst = sextend.bind(ty).bind(ty_half);
|
||||
{
|
||||
let inst = sextend.bind(I128).bind(I64);
|
||||
narrow.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![
|
||||
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
|
||||
def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, 63))), // splat sign bit to whole number
|
||||
def!(a = iconcat(x, ah)),
|
||||
],
|
||||
);
|
||||
|
@ -267,45 +268,39 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
],
|
||||
);
|
||||
|
||||
for &ty in &[I128, I64] {
|
||||
let block = var("block");
|
||||
let block1 = var("block1");
|
||||
let block2 = var("block2");
|
||||
narrow.legalize(
|
||||
def!(brz.I128(x, block, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(
|
||||
a = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xl,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(
|
||||
b = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xh,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(c = band(a, b)),
|
||||
def!(brnz(c, block, vararg)),
|
||||
],
|
||||
);
|
||||
|
||||
narrow.legalize(
|
||||
def!(brz.ty(x, block, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(
|
||||
a = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xl,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(
|
||||
b = icmp_imm(
|
||||
Literal::enumerator_for(&imm.intcc, "eq"),
|
||||
xh,
|
||||
Literal::constant(&imm.imm64, 0)
|
||||
)
|
||||
),
|
||||
def!(c = band(a, b)),
|
||||
def!(brnz(c, block, vararg)),
|
||||
],
|
||||
);
|
||||
|
||||
narrow.legalize(
|
||||
def!(brnz.ty(x, block1, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(brnz(xl, block1, vararg)),
|
||||
def!(jump(block2, Literal::empty_vararg())),
|
||||
block!(block2),
|
||||
def!(brnz(xh, block1, vararg)),
|
||||
],
|
||||
);
|
||||
}
|
||||
narrow.legalize(
|
||||
def!(brnz.I128(x, block1, vararg)),
|
||||
vec![
|
||||
def!((xl, xh) = isplit(x)),
|
||||
def!(brnz(xl, block1, vararg)),
|
||||
def!(jump(block2, Literal::empty_vararg())),
|
||||
block!(block2),
|
||||
def!(brnz(xh, block1, vararg)),
|
||||
],
|
||||
);
|
||||
|
||||
narrow.legalize(
|
||||
def!(a = popcnt.I128(x)),
|
||||
|
@ -634,14 +629,6 @@ pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGro
|
|||
);
|
||||
}
|
||||
|
||||
for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
|
||||
let inst = ireduce.bind(ty_half).bind(ty);
|
||||
expand.legalize(
|
||||
def!(a = inst(x)),
|
||||
vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
|
||||
);
|
||||
}
|
||||
|
||||
// Expand integer operations with carry for RISC architectures that don't have
|
||||
// the flags.
|
||||
let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
|
||||
|
|
|
@ -25,14 +25,11 @@ pub(crate) fn define() -> SettingGroup {
|
|||
- `experimental_linear_scan` is an experimental linear scan allocator. It may take less
|
||||
time to allocate registers, but generated code's quality may be inferior. As of
|
||||
2020-04-17, it is still experimental and it should not be used in production settings.
|
||||
- `experimental_linear_scan_checked` is the linear scan allocator with additional self
|
||||
checks that may take some time to run, and thus these checks are disabled by default.
|
||||
"#,
|
||||
vec![
|
||||
"backtracking",
|
||||
"backtracking_checked",
|
||||
"experimental_linear_scan",
|
||||
"experimental_linear_scan_checked",
|
||||
],
|
||||
);
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"d3026bf5426d767b0b23f0a4f6272aaeb68f598a92f6c788c1f6948153fa63c3","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
||||
{"files":{"Cargo.toml":"702a281a26cf7099e1b3ca5e8bea145c113f52242be4f1e7e5b06bf129092599","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"a410bc2f5dcbde499c0cd299c2620bc8111e3c5b3fccdd9e2d85caf3c24fdab3","src/condcodes.rs":"b8d433b2217b86e172d25b6c65a3ce0cc8ca221062cad1b28b0c78d2159fbda9","src/constant_hash.rs":"ffc619f45aad62c6fdcb83553a05879691a72e9a0103375b2d6cc12d52cf72d0","src/constants.rs":"fed03a10a6316e06aa174091db6e7d1fbb5f73c82c31193012ec5ab52f1c603a","src/isa/mod.rs":"428a950eca14acbe783899ccb1aecf15027f8cbe205578308ebde203d10535f3","src/isa/x86/encoding_bits.rs":"7e013fb804b13f9f83a0d517c6f5105856938d08ad378cc44a6fe6a59adef270","src/isa/x86/mod.rs":"01ef4e4d7437f938badbe2137892183c1ac684da0f68a5bec7e06aad34f43b9b","src/lib.rs":"91f26f998f11fb9cb74d2ec171424e29badd417beef023674850ace57149c656"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.64.0"
|
||||
version = "0.63.0"
|
||||
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.64.0"
|
||||
version = "0.63.0"
|
||||
description = "Low-level code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-codegen"
|
||||
|
@ -13,27 +13,25 @@ build = "build.rs"
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.64.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.64.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.64.0" }
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.63.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.63.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.63.0" }
|
||||
hashbrown = { version = "0.7", optional = true }
|
||||
target-lexicon = "0.10"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
gimli = { version = "0.21.0", default-features = false, features = ["write"], optional = true }
|
||||
gimli = { version = "0.20.0", default-features = false, features = ["write"], optional = true }
|
||||
smallvec = { version = "1.0.0" }
|
||||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.1.0" }
|
||||
regalloc = "0.0.25"
|
||||
regalloc = "0.0.21"
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
# accomodated in `tests`.
|
||||
|
||||
[build-dependencies]
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.64.0" }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.64.0" }
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.63.0" }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
|
@ -60,12 +58,10 @@ x86 = []
|
|||
arm32 = []
|
||||
arm64 = []
|
||||
riscv = []
|
||||
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
|
||||
|
||||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
"x64",
|
||||
"arm32",
|
||||
"arm64",
|
||||
"riscv"
|
||||
|
@ -74,12 +70,5 @@ all-arch = [
|
|||
# For dependent crates that want to serialize some parts of cranelift
|
||||
enable-serde = ["serde"]
|
||||
|
||||
# Recompile our optimizations that are written in the `peepmatic` DSL into a
|
||||
# compact finite-state transducer automaton.
|
||||
rebuild-peephole-optimizers = ["peepmatic"]
|
||||
|
||||
# Enable the use of `peepmatic`-generated peephole optimizers.
|
||||
enable-peepmatic = ["peepmatic-runtime"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
|
|
@ -71,22 +71,4 @@ fn main() {
|
|||
);
|
||||
println!("cargo:warning=Generated files are in {}", out_dir);
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-peephole-optimizers")]
|
||||
rebuild_peephole_optimizers();
|
||||
}
|
||||
|
||||
#[cfg(feature = "rebuild-peephole-optimizers")]
|
||||
fn rebuild_peephole_optimizers() {
|
||||
use std::path::Path;
|
||||
|
||||
let source_path = Path::new("src").join("preopt.peepmatic");
|
||||
println!("cargo:rerun-if-changed={}", source_path.display());
|
||||
|
||||
let preopt =
|
||||
peepmatic::compile_file(&source_path).expect("failed to compile `src/preopt.peepmatic`");
|
||||
|
||||
preopt
|
||||
.serialize_to_file(&Path::new("src").join("preopt.serialized"))
|
||||
.expect("failed to serialize peephole optimizer to `src/preopt.serialized`");
|
||||
}
|
||||
|
|
|
@ -54,9 +54,6 @@ pub enum ValueConversion {
|
|||
|
||||
/// Unsigned zero-extend value to the required type.
|
||||
Uext(Type),
|
||||
|
||||
/// Pass value by pointer of given integer type.
|
||||
Pointer(Type),
|
||||
}
|
||||
|
||||
impl ValueConversion {
|
||||
|
@ -66,7 +63,7 @@ impl ValueConversion {
|
|||
Self::IntSplit => ty.half_width().expect("Integer type too small to split"),
|
||||
Self::VectorSplit => ty.half_vector().expect("Not a vector"),
|
||||
Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
|
||||
Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty,
|
||||
Self::Sext(nty) | Self::Uext(nty) => nty,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,11 +74,6 @@ impl ValueConversion {
|
|||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a conversion to pointer?
|
||||
pub fn is_pointer(self) -> bool {
|
||||
matches!(self, Self::Pointer(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// Common trait for assigning arguments to registers or stack locations.
|
||||
|
@ -118,16 +110,10 @@ pub fn legalize_args<AA: ArgAssigner>(args: &[AbiParam], aa: &mut AA) -> Option<
|
|||
}
|
||||
// Split this argument into two smaller ones. Then revisit both.
|
||||
ArgAction::Convert(conv) => {
|
||||
debug_assert!(
|
||||
!arg.legalized_to_pointer,
|
||||
"No more conversions allowed after conversion to pointer"
|
||||
);
|
||||
let value_type = conv.apply(arg.value_type);
|
||||
let new_arg = AbiParam { value_type, ..arg };
|
||||
args.to_mut()[argno].value_type = value_type;
|
||||
if conv.is_pointer() {
|
||||
args.to_mut()[argno].legalized_to_pointer = true;
|
||||
} else if conv.is_split() {
|
||||
let new_arg = AbiParam { value_type, ..arg };
|
||||
if conv.is_split() {
|
||||
args.to_mut().insert(argno + 1, new_arg);
|
||||
}
|
||||
}
|
||||
|
@ -166,10 +152,6 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
|
|||
let have_bits = have.bits();
|
||||
let arg_bits = arg.value_type.bits();
|
||||
|
||||
if arg.legalized_to_pointer {
|
||||
return ValueConversion::Pointer(arg.value_type);
|
||||
}
|
||||
|
||||
match have_bits.cmp(&arg_bits) {
|
||||
// We have fewer bits than the ABI argument.
|
||||
Ordering::Less => {
|
||||
|
@ -244,12 +226,5 @@ mod tests {
|
|||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::IntBits
|
||||
);
|
||||
|
||||
// Value is passed by reference
|
||||
arg.legalized_to_pointer = true;
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::Pointer(types::I32)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,7 @@ const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
|
|||
/// The first value in the bitmap is of the lowest addressed slot on the stack.
|
||||
/// As all stacks in Isa's supported by Cranelift grow down, this means that
|
||||
/// first value is of the top of the stack and values proceed down the stack.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Stackmap {
|
||||
bitmap: Vec<BitSet<Num>>,
|
||||
mapped_words: u32,
|
||||
|
|
|
@ -5,14 +5,12 @@
|
|||
//!
|
||||
//! If you would like to add support for larger bitsets in the future, you need to change the trait
|
||||
//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
|
||||
|
||||
use core::convert::{From, Into};
|
||||
use core::mem::size_of;
|
||||
use core::ops::{Add, BitOr, Shl, Sub};
|
||||
|
||||
/// A small bitset built on a single primitive integer type
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct BitSet<T>(pub T);
|
||||
|
||||
impl<T> BitSet<T>
|
||||
|
|
|
@ -27,7 +27,6 @@ use crate::nan_canonicalization::do_nan_canonicalization;
|
|||
use crate::postopt::do_postopt;
|
||||
use crate::redundant_reload_remover::RedundantReloadRemover;
|
||||
use crate::regalloc;
|
||||
use crate::remove_constant_phis::do_remove_constant_phis;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{FlagsOrIsa, OptLevel};
|
||||
use crate::simple_gvn::do_simple_gvn;
|
||||
|
@ -180,8 +179,6 @@ impl Context {
|
|||
self.dce(isa)?;
|
||||
}
|
||||
|
||||
self.remove_constant_phis(isa)?;
|
||||
|
||||
if let Some(backend) = isa.get_mach_backend() {
|
||||
let result = backend.compile_function(&self.func, self.want_disasm)?;
|
||||
let info = result.code_info();
|
||||
|
@ -227,7 +224,7 @@ impl Context {
|
|||
let _tt = timing::binemit();
|
||||
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
|
||||
if let Some(ref result) = &self.mach_compile_result {
|
||||
result.buffer.emit(&mut sink);
|
||||
result.sections.emit(&mut sink);
|
||||
} else {
|
||||
isa.emit_function_to_memory(&self.func, &mut sink);
|
||||
}
|
||||
|
@ -295,16 +292,6 @@ impl Context {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform constant-phi removal on the function.
|
||||
pub fn remove_constant_phis<'a, FOI: Into<FlagsOrIsa<'a>>>(
|
||||
&mut self,
|
||||
fisa: FOI,
|
||||
) -> CodegenResult<()> {
|
||||
do_remove_constant_phis(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform pre-legalization rewrites on the function.
|
||||
pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_preopt(&mut self.func, &mut self.cfg, isa);
|
||||
|
|
|
@ -794,20 +794,15 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
|||
if !self.srcloc.is_default() {
|
||||
self.func.srclocs[inst] = self.srcloc;
|
||||
}
|
||||
|
||||
// Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
|
||||
// during lowering.
|
||||
if self.isa.get_mach_backend().is_none() {
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
|
||||
match self
|
||||
.isa
|
||||
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
|
||||
{
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
}
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
|
||||
match self
|
||||
.isa
|
||||
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
|
||||
{
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
}
|
||||
|
||||
&mut self.func.dfg
|
||||
|
|
|
@ -40,24 +40,3 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
|
|||
let opcode = data.opcode();
|
||||
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
|
||||
}
|
||||
|
||||
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load?
|
||||
pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool {
|
||||
has_side_effect(func, inst) || func.dfg[inst].opcode().can_load()
|
||||
}
|
||||
|
||||
/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
|
||||
/// represented in 64 bits?
|
||||
pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
|
||||
let data = &func.dfg[inst];
|
||||
if data.opcode() == Opcode::Null {
|
||||
return Some(0);
|
||||
}
|
||||
match data {
|
||||
&InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
|
||||
&InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
|
||||
&InstructionData::UnaryBool { imm, .. } => Some(if imm { 1 } else { 0 }),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -234,7 +234,11 @@ impl DataFlowGraph {
|
|||
|
||||
/// Get the type of a value.
|
||||
pub fn value_type(&self, v: Value) -> Type {
|
||||
self.values[v].ty()
|
||||
match self.values[v] {
|
||||
ValueData::Inst { ty, .. }
|
||||
| ValueData::Param { ty, .. }
|
||||
| ValueData::Alias { ty, .. } => ty,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the definition of a value.
|
||||
|
@ -379,14 +383,9 @@ pub enum ValueDef {
|
|||
impl ValueDef {
|
||||
/// Unwrap the instruction where the value was defined, or panic.
|
||||
pub fn unwrap_inst(&self) -> Inst {
|
||||
self.inst().expect("Value is not an instruction result")
|
||||
}
|
||||
|
||||
/// Get the instruction where the value was defined, if any.
|
||||
pub fn inst(&self) -> Option<Inst> {
|
||||
match *self {
|
||||
Self::Result(inst, _) => Some(inst),
|
||||
_ => None,
|
||||
Self::Result(inst, _) => inst,
|
||||
_ => panic!("Value is not an instruction result"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -429,16 +428,6 @@ enum ValueData {
|
|||
Alias { ty: Type, original: Value },
|
||||
}
|
||||
|
||||
impl ValueData {
|
||||
fn ty(&self) -> Type {
|
||||
match *self {
|
||||
ValueData::Inst { ty, .. }
|
||||
| ValueData::Param { ty, .. }
|
||||
| ValueData::Alias { ty, .. } => ty,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Instructions.
|
||||
///
|
||||
impl DataFlowGraph {
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
|
||||
use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
|
||||
use crate::isa::{CallConv, RegInfo, RegUnit};
|
||||
use crate::machinst::RelocDistance;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt;
|
||||
use core::str::FromStr;
|
||||
|
@ -156,8 +155,6 @@ pub struct AbiParam {
|
|||
/// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
|
||||
/// been legalized.
|
||||
pub location: ArgumentLoc,
|
||||
/// Was the argument converted to pointer during legalization?
|
||||
pub legalized_to_pointer: bool,
|
||||
}
|
||||
|
||||
impl AbiParam {
|
||||
|
@ -168,7 +165,6 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose: ArgumentPurpose::Normal,
|
||||
location: Default::default(),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -179,7 +175,6 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: Default::default(),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -190,7 +185,6 @@ impl AbiParam {
|
|||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: ArgumentLoc::Reg(regunit),
|
||||
legalized_to_pointer: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -224,9 +218,6 @@ pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
|
|||
impl<'a> fmt::Display for DisplayAbiParam<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0.value_type)?;
|
||||
if self.0.legalized_to_pointer {
|
||||
write!(f, " ptr")?;
|
||||
}
|
||||
match self.0.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => write!(f, " uext")?,
|
||||
|
@ -375,16 +366,6 @@ pub struct ExtFuncData {
|
|||
/// Will this function be defined nearby, such that it will always be a certain distance away,
|
||||
/// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
|
||||
/// symbols meant to be preemptible cannot be considered colocated.
|
||||
///
|
||||
/// If `true`, some backends may use relocation forms that have limited range. The exact
|
||||
/// distance depends on the code model in use. Currently on AArch64, for example, Cranelift
|
||||
/// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how
|
||||
/// far away the target will be, it is best not to set the `colocated` flag; in general, this
|
||||
/// flag is best used when the target is known to be in the same unit of code generation, such
|
||||
/// as a Wasm module.
|
||||
///
|
||||
/// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A
|
||||
/// `colocated` flag value of `true` implies `RelocDistance::Near`.
|
||||
pub colocated: bool,
|
||||
}
|
||||
|
||||
|
@ -397,17 +378,6 @@ impl fmt::Display for ExtFuncData {
|
|||
}
|
||||
}
|
||||
|
||||
impl ExtFuncData {
|
||||
/// Return an estimate of the distance to the referred-to function symbol.
|
||||
pub fn reloc_distance(&self) -> RelocDistance {
|
||||
if self.colocated {
|
||||
RelocDistance::Near
|
||||
} else {
|
||||
RelocDistance::Far
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -423,8 +393,6 @@ mod tests {
|
|||
assert_eq!(t.sext().to_string(), "i32 sext");
|
||||
t.purpose = ArgumentPurpose::StructReturn;
|
||||
assert_eq!(t.to_string(), "i32 uext sret");
|
||||
t.legalized_to_pointer = true;
|
||||
assert_eq!(t.to_string(), "i32 ptr uext sret");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -308,30 +308,6 @@ impl Function {
|
|||
// function, assume it is not a leaf.
|
||||
self.dfg.signatures.is_empty()
|
||||
}
|
||||
|
||||
/// Replace the `dst` instruction's data with the `src` instruction's data
|
||||
/// and then remove `src`.
|
||||
///
|
||||
/// `src` and its result values should not be used at all, as any uses would
|
||||
/// be left dangling after calling this method.
|
||||
///
|
||||
/// `src` and `dst` must have the same number of resulting values, and
|
||||
/// `src`'s i^th value must have the same type as `dst`'s i^th value.
|
||||
pub fn transplant_inst(&mut self, dst: Inst, src: Inst) {
|
||||
debug_assert_eq!(
|
||||
self.dfg.inst_results(dst).len(),
|
||||
self.dfg.inst_results(src).len()
|
||||
);
|
||||
debug_assert!(self
|
||||
.dfg
|
||||
.inst_results(dst)
|
||||
.iter()
|
||||
.zip(self.dfg.inst_results(src))
|
||||
.all(|(a, b)| self.dfg.value_type(*a) == self.dfg.value_type(*b)));
|
||||
|
||||
self.dfg[dst] = self.dfg[src].clone();
|
||||
self.layout.remove_inst(src);
|
||||
}
|
||||
}
|
||||
|
||||
/// Additional annotations for function display.
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
use crate::ir::immediates::{Imm64, Offset32};
|
||||
use crate::ir::{ExternalName, GlobalValue, Type};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::machinst::RelocDistance;
|
||||
use core::fmt;
|
||||
|
||||
/// Information about a global value declaration.
|
||||
|
@ -63,10 +62,6 @@ pub enum GlobalValueData {
|
|||
/// Will this symbol be defined nearby, such that it will always be a certain distance
|
||||
/// away, after linking? If so, references to it can avoid going through a GOT. Note that
|
||||
/// symbols meant to be preemptible cannot be colocated.
|
||||
///
|
||||
/// If `true`, some backends may use relocation forms that have limited range: for example,
|
||||
/// a +/- 2^27-byte range on AArch64. See the documentation for
|
||||
/// [`RelocDistance`](machinst::RelocDistance) for more details.
|
||||
colocated: bool,
|
||||
|
||||
/// Does this symbol refer to a thread local storage value?
|
||||
|
@ -90,20 +85,6 @@ impl GlobalValueData {
|
|||
Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this global references a symbol, return an estimate of the relocation distance,
|
||||
/// based on the `colocated` flag.
|
||||
pub fn maybe_reloc_distance(&self) -> Option<RelocDistance> {
|
||||
match self {
|
||||
&GlobalValueData::Symbol {
|
||||
colocated: true, ..
|
||||
} => Some(RelocDistance::Near),
|
||||
&GlobalValueData::Symbol {
|
||||
colocated: false, ..
|
||||
} => Some(RelocDistance::Far),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for GlobalValueData {
|
||||
|
|
|
@ -62,21 +62,6 @@ impl Imm64 {
|
|||
pub fn bits(&self) -> i64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Sign extend this immediate as if it were a signed integer of the given
|
||||
/// power-of-two width.
|
||||
pub fn sign_extend_from_width(&mut self, bit_width: u16) {
|
||||
debug_assert!(bit_width.is_power_of_two());
|
||||
|
||||
if bit_width >= 64 {
|
||||
return;
|
||||
}
|
||||
|
||||
let bit_width = bit_width as i64;
|
||||
let delta = 64 - bit_width;
|
||||
let sign_extended = (self.0 << delta) >> delta;
|
||||
*self = Imm64(sign_extended);
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<i64> for Imm64 {
|
||||
|
|
|
@ -11,7 +11,9 @@ use core::fmt::{self, Display, Formatter};
|
|||
use core::ops::{Deref, DerefMut};
|
||||
use core::str::FromStr;
|
||||
|
||||
use crate::ir::{self, trapcode::TrapCode, types, Block, FuncRef, JumpTable, SigRef, Type, Value};
|
||||
use crate::ir;
|
||||
use crate::ir::types;
|
||||
use crate::ir::{Block, FuncRef, JumpTable, SigRef, Type, Value};
|
||||
use crate::isa;
|
||||
|
||||
use crate::bitset::BitSet;
|
||||
|
@ -255,30 +257,6 @@ impl InstructionData {
|
|||
}
|
||||
}
|
||||
|
||||
/// If this is a trapping instruction, get its trap code. Otherwise, return
|
||||
/// `None`.
|
||||
pub fn trap_code(&self) -> Option<TrapCode> {
|
||||
match *self {
|
||||
Self::CondTrap { code, .. }
|
||||
| Self::FloatCondTrap { code, .. }
|
||||
| Self::IntCondTrap { code, .. }
|
||||
| Self::Trap { code, .. } => Some(code),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this is a trapping instruction, get an exclusive reference to its
|
||||
/// trap code. Otherwise, return `None`.
|
||||
pub fn trap_code_mut(&mut self) -> Option<&mut TrapCode> {
|
||||
match self {
|
||||
Self::CondTrap { code, .. }
|
||||
| Self::FloatCondTrap { code, .. }
|
||||
| Self::IntCondTrap { code, .. }
|
||||
| Self::Trap { code, .. } => Some(code),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return information about a call instruction.
|
||||
///
|
||||
/// Any instruction that can call another function reveals its call signature here.
|
||||
|
@ -296,39 +274,6 @@ impl InstructionData {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn sign_extend_immediates(&mut self, ctrl_typevar: Type) {
|
||||
if ctrl_typevar.is_invalid() {
|
||||
return;
|
||||
}
|
||||
|
||||
let bit_width = ctrl_typevar.bits();
|
||||
|
||||
match self {
|
||||
Self::BinaryImm64 {
|
||||
opcode,
|
||||
arg: _,
|
||||
imm,
|
||||
} => {
|
||||
if matches!(opcode, Opcode::SdivImm | Opcode::SremImm) {
|
||||
imm.sign_extend_from_width(bit_width);
|
||||
}
|
||||
}
|
||||
Self::IntCompareImm {
|
||||
opcode,
|
||||
arg: _,
|
||||
cond,
|
||||
imm,
|
||||
} => {
|
||||
debug_assert_eq!(*opcode, Opcode::IcmpImm);
|
||||
if cond.unsigned() != *cond {
|
||||
imm.sign_extend_from_width(bit_width);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about branch and jump instructions.
|
||||
|
|
|
@ -24,20 +24,6 @@ pub enum LibCall {
|
|||
/// probe for stack overflow. These are emitted for functions which need
|
||||
/// when the `enable_probestack` setting is true.
|
||||
Probestack,
|
||||
/// udiv.i64
|
||||
UdivI64,
|
||||
/// sdiv.i64
|
||||
SdivI64,
|
||||
/// urem.i64
|
||||
UremI64,
|
||||
/// srem.i64
|
||||
SremI64,
|
||||
/// ishl.i64
|
||||
IshlI64,
|
||||
/// ushr.i64
|
||||
UshrI64,
|
||||
/// sshr.i64
|
||||
SshrI64,
|
||||
/// ceil.f32
|
||||
CeilF32,
|
||||
/// ceil.f64
|
||||
|
@ -77,13 +63,6 @@ impl FromStr for LibCall {
|
|||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"Probestack" => Ok(Self::Probestack),
|
||||
"UdivI64" => Ok(Self::UdivI64),
|
||||
"SdivI64" => Ok(Self::SdivI64),
|
||||
"UremI64" => Ok(Self::UremI64),
|
||||
"SremI64" => Ok(Self::SremI64),
|
||||
"IshlI64" => Ok(Self::IshlI64),
|
||||
"UshrI64" => Ok(Self::UshrI64),
|
||||
"SshrI64" => Ok(Self::SshrI64),
|
||||
"CeilF32" => Ok(Self::CeilF32),
|
||||
"CeilF64" => Ok(Self::CeilF64),
|
||||
"FloorF32" => Ok(Self::FloorF32),
|
||||
|
@ -109,16 +88,6 @@ impl LibCall {
|
|||
/// Returns `None` if no well-known library routine name exists for that instruction.
|
||||
pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
|
||||
Some(match ctrl_type {
|
||||
types::I64 => match opcode {
|
||||
Opcode::Udiv => Self::UdivI64,
|
||||
Opcode::Sdiv => Self::SdivI64,
|
||||
Opcode::Urem => Self::UremI64,
|
||||
Opcode::Srem => Self::SremI64,
|
||||
Opcode::Ishl => Self::IshlI64,
|
||||
Opcode::Ushr => Self::UshrI64,
|
||||
Opcode::Sshr => Self::SshrI64,
|
||||
_ => return None,
|
||||
},
|
||||
types::F32 => match opcode {
|
||||
Opcode::Ceil => Self::CeilF32,
|
||||
Opcode::Floor => Self::FloorF32,
|
||||
|
|
|
@ -27,6 +27,9 @@ pub enum TrapCode {
|
|||
/// A `table_addr` instruction detected an out-of-bounds error.
|
||||
TableOutOfBounds,
|
||||
|
||||
/// Other bounds checking error.
|
||||
OutOfBounds,
|
||||
|
||||
/// Indirect call to a null table entry.
|
||||
IndirectCallToNull,
|
||||
|
||||
|
@ -60,6 +63,7 @@ impl Display for TrapCode {
|
|||
StackOverflow => "stk_ovf",
|
||||
HeapOutOfBounds => "heap_oob",
|
||||
TableOutOfBounds => "table_oob",
|
||||
OutOfBounds => "oob",
|
||||
IndirectCallToNull => "icall_null",
|
||||
BadSignature => "bad_sig",
|
||||
IntegerOverflow => "int_ovf",
|
||||
|
@ -82,6 +86,7 @@ impl FromStr for TrapCode {
|
|||
"stk_ovf" => Ok(StackOverflow),
|
||||
"heap_oob" => Ok(HeapOutOfBounds),
|
||||
"table_oob" => Ok(TableOutOfBounds),
|
||||
"oob" => Ok(OutOfBounds),
|
||||
"icall_null" => Ok(IndirectCallToNull),
|
||||
"bad_sig" => Ok(BadSignature),
|
||||
"int_ovf" => Ok(IntegerOverflow),
|
||||
|
@ -101,10 +106,11 @@ mod tests {
|
|||
use alloc::string::ToString;
|
||||
|
||||
// Everything but user-defined codes.
|
||||
const CODES: [TrapCode; 10] = [
|
||||
const CODES: [TrapCode; 11] = [
|
||||
TrapCode::StackOverflow,
|
||||
TrapCode::HeapOutOfBounds,
|
||||
TrapCode::TableOutOfBounds,
|
||||
TrapCode::OutOfBounds,
|
||||
TrapCode::IndirectCallToNull,
|
||||
TrapCode::BadSignature,
|
||||
TrapCode::IntegerOverflow,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -3,14 +3,14 @@
|
|||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
use crate::machinst::MachLabel;
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use core::convert::Into;
|
||||
use core::convert::{Into, TryFrom};
|
||||
use std::string::String;
|
||||
|
||||
/// A shift operator for a register or immediate.
|
||||
|
@ -112,9 +112,7 @@ pub enum MemLabel {
|
|||
/// A memory argument to load/store, encapsulating the possible addressing modes.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MemArg {
|
||||
//
|
||||
// Real ARM64 addressing modes:
|
||||
//
|
||||
Label(MemLabel),
|
||||
/// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
|
||||
PostIndexed(Writable<Reg>, SImm9),
|
||||
/// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
|
||||
|
@ -139,35 +137,11 @@ pub enum MemArg {
|
|||
/// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
|
||||
UnsignedOffset(Reg, UImm12Scaled),
|
||||
|
||||
//
|
||||
// virtual addressing modes that are lowered at emission time:
|
||||
//
|
||||
/// Reference to a "label": e.g., a symbol.
|
||||
Label(MemLabel),
|
||||
/// Offset from the stack pointer. Lowered into a real amode at emission.
|
||||
SPOffset(i64),
|
||||
|
||||
/// Arbitrary offset from a register. Converted to generation of large
|
||||
/// offsets with multiple instructions as necessary during code emission.
|
||||
RegOffset(Reg, i64, Type),
|
||||
|
||||
/// Offset from the stack pointer.
|
||||
SPOffset(i64, Type),
|
||||
|
||||
/// Offset from the frame pointer.
|
||||
FPOffset(i64, Type),
|
||||
|
||||
/// Offset from the "nominal stack pointer", which is where the real SP is
|
||||
/// just after stack and spill slots are allocated in the function prologue.
|
||||
/// At emission time, this is converted to `SPOffset` with a fixup added to
|
||||
/// the offset constant. The fixup is a running value that is tracked as
|
||||
/// emission iterates through instructions in linear order, and can be
|
||||
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
|
||||
///
|
||||
/// The standard ABI is in charge of handling this (by emitting the
|
||||
/// adjustment meta-instructions). It maintains the invariant that "nominal
|
||||
/// SP" is where the actual SP is after the function prologue and before
|
||||
/// clobber pushes. See the diagram in the documentation for
|
||||
/// [crate::isa::aarch64::abi](the ABI module) for more details.
|
||||
NominalSPOffset(i64, Type),
|
||||
/// Offset from the frame pointer. Lowered into a real amode at emission.
|
||||
FPOffset(i64),
|
||||
}
|
||||
|
||||
impl MemArg {
|
||||
|
@ -178,6 +152,17 @@ impl MemArg {
|
|||
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
|
||||
}
|
||||
|
||||
/// Memory reference using an address in a register and an offset, if possible.
|
||||
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
|
||||
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
|
||||
Some(MemArg::Unscaled(reg, simm9))
|
||||
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
|
||||
Some(MemArg::UnsignedOffset(reg, uimm12s))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory reference using the sum of two registers as an address.
|
||||
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
|
||||
MemArg::RegReg(reg1, reg2)
|
||||
|
@ -296,44 +281,78 @@ impl CondBrKind {
|
|||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a Label, as passed into
|
||||
/// An unresolved reference to a BlockIndex, as passed into
|
||||
/// `lower_branch_group()`.
|
||||
Label(MachLabel),
|
||||
/// A fixed PC offset.
|
||||
ResolvedOffset(i32),
|
||||
Block(BlockIndex),
|
||||
/// A resolved reference to another instruction, after
|
||||
/// `Inst::with_block_offsets()`.
|
||||
ResolvedOffset(isize),
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Return the target's label, if it is a label-based target.
|
||||
pub fn as_label(self) -> Option<MachLabel> {
|
||||
/// Lower the branch target given offsets of each block.
|
||||
pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) {
|
||||
match self {
|
||||
BranchTarget::Label(l) => Some(l),
|
||||
&mut BranchTarget::Block(bix) => {
|
||||
let bix = usize::try_from(bix).unwrap();
|
||||
assert!(bix < targets.len());
|
||||
let block_offset_in_func = targets[bix];
|
||||
let branch_offset = (block_offset_in_func as isize) - (my_offset as isize);
|
||||
*self = BranchTarget::ResolvedOffset(branch_offset);
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(..) => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the block index.
|
||||
pub fn as_block_index(&self) -> Option<BlockIndex> {
|
||||
match self {
|
||||
&BranchTarget::Block(bix) => Some(bix),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_offset19_or_zero(self) -> u32 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
/// Get the offset as 4-byte words. Returns `0` if not
|
||||
/// yet resolved (in that case, we're only computing
|
||||
/// size and the offset doesn't matter).
|
||||
pub fn as_offset_words(&self) -> isize {
|
||||
match self {
|
||||
&BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
_ => 0,
|
||||
};
|
||||
assert!(off <= 0x3ffff);
|
||||
assert!(off >= -0x40000);
|
||||
(off as u32) & 0x7ffff
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_offset26_or_zero(self) -> u32 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 2,
|
||||
_ => 0,
|
||||
};
|
||||
assert!(off <= 0x1ffffff);
|
||||
assert!(off >= -0x2000000);
|
||||
(off as u32) & 0x3ffffff
|
||||
/// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow.
|
||||
pub fn as_off26(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 25)) && (off >= -(1 << 25)) {
|
||||
Some((off as u32) & ((1 << 26) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a 19-bit offset, or `None` if overflow.
|
||||
pub fn as_off19(&self) -> Option<u32> {
|
||||
let off = self.as_offset_words();
|
||||
if (off < (1 << 18)) && (off >= -(1 << 18)) {
|
||||
Some((off as u32) & ((1 << 19) - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Map the block index given a transform map.
|
||||
pub fn map(&mut self, block_index_map: &[BlockIndex]) {
|
||||
match self {
|
||||
&mut BranchTarget::Block(ref mut bix) => {
|
||||
let n = block_index_map[usize::try_from(*bix).unwrap()];
|
||||
*bix = n;
|
||||
}
|
||||
&mut BranchTarget::ResolvedOffset(_) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -424,11 +443,8 @@ impl ShowWithRRU for MemArg {
|
|||
simm9.show_rru(mb_rru)
|
||||
),
|
||||
// Eliminated by `mem_finalize()`.
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..)
|
||||
| &MemArg::RegOffset(..) => {
|
||||
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Unexpected stack-offset mem-arg mode!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -469,21 +485,18 @@ impl ShowWithRRU for Cond {
|
|||
impl ShowWithRRU for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
|
||||
&BranchTarget::Block(block) => format!("block{}", block),
|
||||
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
|
||||
/// 64-bit variants of many instructions (and integer and floating-point registers) and 128-bit
|
||||
/// variants of vector instructions.
|
||||
/// TODO: Create a separate type for SIMD & floating-point operands.
|
||||
/// 64-bit variants of many instructions (and integer registers).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstSize {
|
||||
Size32,
|
||||
Size64,
|
||||
Size128,
|
||||
}
|
||||
|
||||
impl InstSize {
|
||||
|
@ -506,13 +519,11 @@ impl InstSize {
|
|||
/// Convert from a needed width to the smallest size that fits.
|
||||
pub fn from_bits<I: Into<usize>>(bits: I) -> InstSize {
|
||||
let bits: usize = bits.into();
|
||||
assert!(bits <= 128);
|
||||
assert!(bits <= 64);
|
||||
if bits <= 32 {
|
||||
InstSize::Size32
|
||||
} else if bits <= 64 {
|
||||
InstSize::Size64
|
||||
} else {
|
||||
InstSize::Size128
|
||||
InstSize::Size64
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -521,12 +532,11 @@ impl InstSize {
|
|||
Self::from_bits(ty_bits(ty))
|
||||
}
|
||||
|
||||
/// Convert to I32, I64, or I128.
|
||||
/// Convert to I32 or I64.
|
||||
pub fn to_ty(self) -> Type {
|
||||
match self {
|
||||
InstSize::Size32 => I32,
|
||||
InstSize::Size64 => I64,
|
||||
InstSize::Size128 => I128,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,9 +544,6 @@ impl InstSize {
|
|||
match self {
|
||||
InstSize::Size32 => 0,
|
||||
InstSize::Size64 => 1,
|
||||
_ => {
|
||||
panic!("Unexpected size");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,13 +4,12 @@ use crate::binemit::{CodeOffset, Reloc};
|
|||
use crate::ir::constant::ConstantData;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::TrapCode;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
use crate::isa::aarch64::{inst::regs::PINNED_REG, inst::*};
|
||||
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use log::debug;
|
||||
|
||||
/// Memory label/reference finalization: convert a MemLabel to a PC-relative
|
||||
/// offset, possibly emitting relocation(s) as necessary.
|
||||
|
@ -24,67 +23,43 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
|
|||
/// generic arbitrary stack offset) into real addressing modes, possibly by
|
||||
/// emitting some helper instructions that come immediately before the use
|
||||
/// of this amode.
|
||||
pub fn mem_finalize(
|
||||
insn_off: CodeOffset,
|
||||
mem: &MemArg,
|
||||
state: &EmitState,
|
||||
) -> (SmallVec<[Inst; 4]>, MemArg) {
|
||||
pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
|
||||
match mem {
|
||||
&MemArg::RegOffset(_, off, ty)
|
||||
| &MemArg::SPOffset(off, ty)
|
||||
| &MemArg::FPOffset(off, ty)
|
||||
| &MemArg::NominalSPOffset(off, ty) => {
|
||||
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) => {
|
||||
let basereg = match mem {
|
||||
&MemArg::RegOffset(reg, _, _) => reg,
|
||||
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
|
||||
&MemArg::SPOffset(..) => stack_reg(),
|
||||
&MemArg::FPOffset(..) => fp_reg(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let adj = match mem {
|
||||
&MemArg::NominalSPOffset(..) => {
|
||||
debug!(
|
||||
"mem_finalize: nominal SP offset {} + adj {} -> {}",
|
||||
off,
|
||||
state.virtual_sp_offset,
|
||||
off + state.virtual_sp_offset
|
||||
);
|
||||
state.virtual_sp_offset
|
||||
}
|
||||
_ => 0,
|
||||
};
|
||||
let off = off + adj;
|
||||
|
||||
if let Some(simm9) = SImm9::maybe_from_i64(off) {
|
||||
let mem = MemArg::Unscaled(basereg, simm9);
|
||||
(smallvec![], mem)
|
||||
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
|
||||
let mem = MemArg::UnsignedOffset(basereg, uimm12s);
|
||||
(smallvec![], mem)
|
||||
(vec![], mem)
|
||||
} else {
|
||||
// In an addition, x31 is the zero register, not sp; we have only one temporary
|
||||
// so we can't do the proper add here.
|
||||
debug_assert_ne!(
|
||||
basereg,
|
||||
stack_reg(),
|
||||
"should have diverted SP before mem_finalize"
|
||||
);
|
||||
|
||||
let tmp = writable_spilltmp_reg();
|
||||
let mut const_insts = Inst::load_constant(tmp, off as u64);
|
||||
// N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
|
||||
// (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
|
||||
// is a valid base (for SPOffset) which we must handle here.
|
||||
// Also, SP needs to be the first arg, not second.
|
||||
let add_inst = Inst::AluRRRExtend {
|
||||
let add_inst = Inst::AluRRR {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: tmp,
|
||||
rn: basereg,
|
||||
rm: tmp.to_reg(),
|
||||
extendop: ExtendOp::UXTX,
|
||||
rn: tmp.to_reg(),
|
||||
rm: basereg,
|
||||
};
|
||||
const_insts.push(add_inst);
|
||||
(const_insts, MemArg::reg(tmp.to_reg()))
|
||||
(const_insts.to_vec(), MemArg::reg(tmp.to_reg()))
|
||||
}
|
||||
}
|
||||
|
||||
&MemArg::Label(ref label) => {
|
||||
let off = memlabel_finalize(insn_off, label);
|
||||
(smallvec![], MemArg::Label(MemLabel::PCRel(off)))
|
||||
(vec![], MemArg::Label(MemLabel::PCRel(off)))
|
||||
}
|
||||
|
||||
_ => (smallvec![], mem.clone()),
|
||||
_ => (vec![], mem.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,12 +73,12 @@ pub fn u64_constant(bits: u64) -> ConstantData {
|
|||
// Instructions and subcomponents: emission
|
||||
|
||||
fn machreg_to_gpr(m: Reg) -> u32 {
|
||||
assert_eq!(m.get_class(), RegClass::I64);
|
||||
assert!(m.get_class() == RegClass::I64);
|
||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||
}
|
||||
|
||||
fn machreg_to_vec(m: Reg) -> u32 {
|
||||
assert_eq!(m.get_class(), RegClass::V128);
|
||||
assert!(m.get_class() == RegClass::V128);
|
||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||
}
|
||||
|
||||
|
@ -162,14 +137,6 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
|
|||
(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
|
||||
}
|
||||
|
||||
fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
|
||||
match kind {
|
||||
CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
|
||||
CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
|
||||
CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
|
||||
}
|
||||
}
|
||||
|
||||
const MOVE_WIDE_FIXED: u32 = 0x92800000;
|
||||
|
||||
#[repr(u32)]
|
||||
|
@ -308,8 +275,8 @@ fn enc_ccmp_imm(size: InstSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) ->
|
|||
}
|
||||
|
||||
fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert!(!is_16b); // to be supported later.
|
||||
0b00001110_101_00000_00011_1_00000_00000
|
||||
| ((is_16b as u32) << 30)
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
| (machreg_to_vec(rn) << 16)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
|
@ -355,29 +322,8 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||
let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
|
||||
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
virtual_sp_offset: i64,
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
type State = EmitState;
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, flags: &settings::Flags, state: &mut EmitState) {
|
||||
// N.B.: we *must* not exceed the "worst-case size" used to compute
|
||||
// where to insert islands, except when islands are explicitly triggered
|
||||
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
|
||||
// to allow disabling the check for `JTSequence`, which is always
|
||||
// emitted following an `EmitIsland`.
|
||||
let mut start_off = sink.cur_offset();
|
||||
|
||||
impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
|
||||
fn emit(&self, sink: &mut O, flags: &settings::Flags) {
|
||||
match self {
|
||||
&Inst::AluRRR { alu_op, rd, rn, rm } => {
|
||||
let top11 = match alu_op {
|
||||
|
@ -650,10 +596,10 @@ impl MachInstEmit for Inst {
|
|||
ref mem,
|
||||
srcloc,
|
||||
} => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
|
||||
// ldst encoding helpers take Reg, not Writable<Reg>.
|
||||
|
@ -662,17 +608,17 @@ impl MachInstEmit for Inst {
|
|||
// This is the base opcode (top 10 bits) for the "unscaled
|
||||
// immediate" form (Unscaled). Other addressing modes will OR in
|
||||
// other values for bits 24/25 (bits 1/2 of this constant).
|
||||
let (op, bits) = match self {
|
||||
&Inst::ULoad8 { .. } => (0b0011100001, 8),
|
||||
&Inst::SLoad8 { .. } => (0b0011100010, 8),
|
||||
&Inst::ULoad16 { .. } => (0b0111100001, 16),
|
||||
&Inst::SLoad16 { .. } => (0b0111100010, 16),
|
||||
&Inst::ULoad32 { .. } => (0b1011100001, 32),
|
||||
&Inst::SLoad32 { .. } => (0b1011100010, 32),
|
||||
&Inst::ULoad64 { .. } => (0b1111100001, 64),
|
||||
&Inst::FpuLoad32 { .. } => (0b1011110001, 32),
|
||||
&Inst::FpuLoad64 { .. } => (0b1111110001, 64),
|
||||
&Inst::FpuLoad128 { .. } => (0b0011110011, 128),
|
||||
let op = match self {
|
||||
&Inst::ULoad8 { .. } => 0b0011100001,
|
||||
&Inst::SLoad8 { .. } => 0b0011100010,
|
||||
&Inst::ULoad16 { .. } => 0b0111100001,
|
||||
&Inst::SLoad16 { .. } => 0b0111100010,
|
||||
&Inst::ULoad32 { .. } => 0b1011100001,
|
||||
&Inst::SLoad32 { .. } => 0b1011100010,
|
||||
&Inst::ULoad64 { .. } => 0b1111100001,
|
||||
&Inst::FpuLoad32 { .. } => 0b1011110001,
|
||||
&Inst::FpuLoad64 { .. } => 0b1111110001,
|
||||
&Inst::FpuLoad128 { .. } => 0b0011110011,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
@ -686,9 +632,6 @@ impl MachInstEmit for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
|
||||
}
|
||||
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
|
||||
if uimm12scaled.value() != 0 {
|
||||
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
|
||||
}
|
||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
|
||||
}
|
||||
&MemArg::RegReg(r1, r2) => {
|
||||
|
@ -697,7 +640,19 @@ impl MachInstEmit for Inst {
|
|||
));
|
||||
}
|
||||
&MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
|
||||
assert_eq!(bits, ty_bits(ty));
|
||||
match (ty, self) {
|
||||
(I8, &Inst::ULoad8 { .. }) => {}
|
||||
(I8, &Inst::SLoad8 { .. }) => {}
|
||||
(I16, &Inst::ULoad16 { .. }) => {}
|
||||
(I16, &Inst::SLoad16 { .. }) => {}
|
||||
(I32, &Inst::ULoad32 { .. }) => {}
|
||||
(I32, &Inst::SLoad32 { .. }) => {}
|
||||
(I64, &Inst::ULoad64 { .. }) => {}
|
||||
(F32, &Inst::FpuLoad32 { .. }) => {}
|
||||
(F64, &Inst::FpuLoad64 { .. }) => {}
|
||||
(I128, &Inst::FpuLoad128 { .. }) => {}
|
||||
_ => panic!("Mismatching reg-scaling type in MemArg"),
|
||||
}
|
||||
let extendop = match &mem {
|
||||
&MemArg::RegScaled(..) => None,
|
||||
&MemArg::RegScaledExtended(_, _, _, op) => Some(op),
|
||||
|
@ -742,10 +697,9 @@ impl MachInstEmit for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
|
||||
}
|
||||
// Eliminated by `mem_finalize()` above.
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
|
||||
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Should not see stack-offset here!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -785,20 +739,20 @@ impl MachInstEmit for Inst {
|
|||
ref mem,
|
||||
srcloc,
|
||||
} => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem);
|
||||
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
|
||||
let (op, bits) = match self {
|
||||
&Inst::Store8 { .. } => (0b0011100000, 8),
|
||||
&Inst::Store16 { .. } => (0b0111100000, 16),
|
||||
&Inst::Store32 { .. } => (0b1011100000, 32),
|
||||
&Inst::Store64 { .. } => (0b1111100000, 64),
|
||||
&Inst::FpuStore32 { .. } => (0b1011110000, 32),
|
||||
&Inst::FpuStore64 { .. } => (0b1111110000, 64),
|
||||
&Inst::FpuStore128 { .. } => (0b0011110010, 128),
|
||||
let op = match self {
|
||||
&Inst::Store8 { .. } => 0b0011100000,
|
||||
&Inst::Store16 { .. } => 0b0111100000,
|
||||
&Inst::Store32 { .. } => 0b1011100000,
|
||||
&Inst::Store64 { .. } => 0b1111100000,
|
||||
&Inst::FpuStore32 { .. } => 0b1011110000,
|
||||
&Inst::FpuStore64 { .. } => 0b1111110000,
|
||||
&Inst::FpuStore128 { .. } => 0b0011110010,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
@ -812,9 +766,6 @@ impl MachInstEmit for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
|
||||
}
|
||||
&MemArg::UnsignedOffset(reg, uimm12scaled) => {
|
||||
if uimm12scaled.value() != 0 {
|
||||
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
|
||||
}
|
||||
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
|
||||
}
|
||||
&MemArg::RegReg(r1, r2) => {
|
||||
|
@ -843,10 +794,9 @@ impl MachInstEmit for Inst {
|
|||
sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
|
||||
}
|
||||
// Eliminated by `mem_finalize()` above.
|
||||
&MemArg::SPOffset(..)
|
||||
| &MemArg::FPOffset(..)
|
||||
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
|
||||
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
|
||||
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) => {
|
||||
panic!("Should not see stack-offset here!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -933,9 +883,6 @@ impl MachInstEmit for Inst {
|
|||
&Inst::FpuMove64 { rd, rn } => {
|
||||
sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
|
||||
}
|
||||
&Inst::FpuMove128 { rd, rn } => {
|
||||
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
let top22 = match fpu_op {
|
||||
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
|
||||
|
@ -966,44 +913,6 @@ impl MachInstEmit for Inst {
|
|||
};
|
||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||
}
|
||||
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
|
||||
FPUOpRI::UShr32(imm) => {
|
||||
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::UShr64(imm) => {
|
||||
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::Sli64(imm) => {
|
||||
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b01_1_111110_0000000_010101_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
FPUOpRI::Sli32(imm) => {
|
||||
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||
sink.put4(
|
||||
0b0_0_1_011110_0000000_010101_00000_00000
|
||||
| imm.enc() << 16
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
)
|
||||
}
|
||||
},
|
||||
&Inst::FpuRRRR {
|
||||
fpu_op,
|
||||
rd,
|
||||
|
@ -1017,15 +926,6 @@ impl MachInstEmit for Inst {
|
|||
};
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let bits_12_16 = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(I8X16, ty);
|
||||
0b00101
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::FpuCmp32 { rn, rm } => {
|
||||
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
||||
}
|
||||
|
@ -1080,11 +980,11 @@ impl MachInstEmit for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(8),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
sink.put4(const_data.to_bits());
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
|
@ -1093,29 +993,13 @@ impl MachInstEmit for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
sink.put8(const_data.to_bits());
|
||||
}
|
||||
&Inst::LoadFpuConst128 { rd, const_data } => {
|
||||
let inst = Inst::FpuLoad128 {
|
||||
rd,
|
||||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None,
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(20),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
for i in const_data.to_le_bytes().iter() {
|
||||
sink.put1(*i);
|
||||
}
|
||||
}
|
||||
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
|
||||
sink.put4(enc_fcsel(rd, rn, rm, cond, InstSize::Size32));
|
||||
}
|
||||
|
@ -1149,40 +1033,12 @@ impl MachInstEmit for Inst {
|
|||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
} => {
|
||||
let enc_size_for_cmp = match ty {
|
||||
I8X16 => 0b00,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
&Inst::VecRRR { rd, rn, rm, alu_op } => {
|
||||
let (top11, bit15_10) = match alu_op {
|
||||
VecALUOp::SQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::SQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::UQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::UQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
|
||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||
VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
|
||||
VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
|
||||
VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
|
||||
VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
@ -1228,7 +1084,7 @@ impl MachInstEmit for Inst {
|
|||
if top22 != 0 {
|
||||
sink.put4(enc_extend(top22, rd, rn));
|
||||
} else {
|
||||
Inst::mov32(rd, rn).emit(sink, flags, state);
|
||||
Inst::mov32(rd, rn).emit(sink, flags);
|
||||
}
|
||||
}
|
||||
&Inst::Extend {
|
||||
|
@ -1251,7 +1107,7 @@ impl MachInstEmit for Inst {
|
|||
rn: zero_reg(),
|
||||
rm: rd.to_reg(),
|
||||
};
|
||||
sub_inst.emit(sink, flags, state);
|
||||
sub_inst.emit(sink, flags);
|
||||
}
|
||||
&Inst::Extend {
|
||||
rd,
|
||||
|
@ -1271,14 +1127,10 @@ impl MachInstEmit for Inst {
|
|||
panic!("Unsupported extend variant");
|
||||
}
|
||||
&Inst::Jump { ref dest } => {
|
||||
let off = sink.cur_offset();
|
||||
// Indicate that the jump uses a label, if so, so that a fixup can occur later.
|
||||
if let Some(l) = dest.as_label() {
|
||||
sink.use_label_at_offset(off, l, LabelUse::Branch26);
|
||||
sink.add_uncond_branch(off, off + 4, l);
|
||||
}
|
||||
// Emit the jump itself.
|
||||
sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
|
||||
// TODO: differentiate between as_off26() returning `None` for
|
||||
// out-of-range vs. not-yet-finalized. The latter happens when we
|
||||
// do early (fake) emission for size computation.
|
||||
sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap()));
|
||||
}
|
||||
&Inst::Ret => {
|
||||
sink.put4(0xd65f03c0);
|
||||
|
@ -1286,47 +1138,71 @@ impl MachInstEmit for Inst {
|
|||
&Inst::EpiloguePlaceholder => {
|
||||
// Noop; this is just a placeholder for epilogues.
|
||||
}
|
||||
&Inst::Call { ref info } => {
|
||||
sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0);
|
||||
&Inst::Call {
|
||||
ref dest,
|
||||
loc,
|
||||
opcode,
|
||||
..
|
||||
} => {
|
||||
sink.add_reloc(loc, Reloc::Arm64Call, dest, 0);
|
||||
sink.put4(enc_jump26(0b100101, 0));
|
||||
if info.opcode.is_call() {
|
||||
sink.add_call_site(info.loc, info.opcode);
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(loc, opcode);
|
||||
}
|
||||
}
|
||||
&Inst::CallInd { ref info } => {
|
||||
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
|
||||
if info.opcode.is_call() {
|
||||
sink.add_call_site(info.loc, info.opcode);
|
||||
&Inst::CallInd {
|
||||
rn, loc, opcode, ..
|
||||
} => {
|
||||
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
|
||||
if opcode.is_call() {
|
||||
sink.add_call_site(loc, opcode);
|
||||
}
|
||||
}
|
||||
&Inst::CondBr {
|
||||
&Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"),
|
||||
&Inst::CondBrLowered { target, kind } => match kind {
|
||||
// TODO: handle >2^19 case by emitting a compound sequence with
|
||||
// an unconditional (26-bit) branch. We need branch-relaxation
|
||||
// adjustment machinery to enable this (because we don't want to
|
||||
// always emit the long form).
|
||||
CondBrKind::Zero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::NotZero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::Cond(c) => {
|
||||
sink.put4(enc_cbr(
|
||||
0b01010100,
|
||||
target.as_off19().unwrap_or(0),
|
||||
0b0,
|
||||
c.bits(),
|
||||
));
|
||||
}
|
||||
},
|
||||
&Inst::CondBrLoweredCompound {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
} => {
|
||||
// Conditional part first.
|
||||
let cond_off = sink.cur_offset();
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
|
||||
let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
|
||||
sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
|
||||
match kind {
|
||||
CondBrKind::Zero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::NotZero(reg) => {
|
||||
sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg));
|
||||
}
|
||||
CondBrKind::Cond(c) => {
|
||||
sink.put4(enc_cbr(
|
||||
0b01010100,
|
||||
taken.as_off19().unwrap_or(0),
|
||||
0b0,
|
||||
c.bits(),
|
||||
));
|
||||
}
|
||||
}
|
||||
sink.put4(enc_conditional_br(taken, kind));
|
||||
|
||||
// Unconditional part next.
|
||||
let uncond_off = sink.cur_offset();
|
||||
if let Some(l) = not_taken.as_label() {
|
||||
sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
|
||||
sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
|
||||
}
|
||||
sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
|
||||
}
|
||||
&Inst::OneWayCondBr { target, kind } => {
|
||||
let off = sink.cur_offset();
|
||||
if let Some(l) = target.as_label() {
|
||||
sink.use_label_at_offset(off, l, LabelUse::Branch19);
|
||||
}
|
||||
sink.put4(enc_conditional_br(target, kind));
|
||||
// Unconditional part.
|
||||
sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0)));
|
||||
}
|
||||
&Inst::IndirectBr { rn, .. } => {
|
||||
sink.put4(enc_br(rn));
|
||||
|
@ -1343,7 +1219,8 @@ impl MachInstEmit for Inst {
|
|||
sink.add_trap(srcloc, code);
|
||||
sink.put4(0xd4a00000);
|
||||
}
|
||||
&Inst::Adr { rd, off } => {
|
||||
&Inst::Adr { rd, ref label } => {
|
||||
let off = memlabel_finalize(sink.cur_offset_from_start(), label);
|
||||
assert!(off > -(1 << 20));
|
||||
assert!(off < (1 << 20));
|
||||
sink.put4(enc_adr(off, rd));
|
||||
|
@ -1358,20 +1235,26 @@ impl MachInstEmit for Inst {
|
|||
ridx,
|
||||
rtmp1,
|
||||
rtmp2,
|
||||
ref info,
|
||||
ref targets,
|
||||
..
|
||||
} => {
|
||||
// This sequence is *one* instruction in the vcode, and is expanded only here at
|
||||
// emission time, because we cannot allow the regalloc to insert spills/reloads in
|
||||
// the middle; we depend on hardcoded PC-rel addressing below.
|
||||
//
|
||||
// N.B.: if PC-rel addressing on ADR below is changed, also update
|
||||
// `Inst::with_block_offsets()` in aarch64/inst/mod.rs.
|
||||
|
||||
// Save index in a tmp (the live range of ridx only goes to start of this
|
||||
// sequence; rtmp1 or rtmp2 may overwrite it).
|
||||
let inst = Inst::gen_move(rtmp2, ridx, I64);
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
// Load address of jump table
|
||||
let inst = Inst::Adr { rd: rtmp1, off: 16 };
|
||||
inst.emit(sink, flags, state);
|
||||
let inst = Inst::Adr {
|
||||
rd: rtmp1,
|
||||
label: MemLabel::PCRel(16),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
// Load value out of jump table
|
||||
let inst = Inst::SLoad32 {
|
||||
rd: rtmp2,
|
||||
|
@ -1383,7 +1266,7 @@ impl MachInstEmit for Inst {
|
|||
),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
// Add base of jump table to jump-table-sourced block offset
|
||||
let inst = Inst::AluRRR {
|
||||
alu_op: ALUOp::Add64,
|
||||
|
@ -1391,30 +1274,22 @@ impl MachInstEmit for Inst {
|
|||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
// Branch to computed address. (`targets` here is only used for successor queries
|
||||
// and is not needed for emission.)
|
||||
let inst = Inst::IndirectBr {
|
||||
rn: rtmp1.to_reg(),
|
||||
targets: vec![],
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
// Emit jump table (table of 32-bit offsets).
|
||||
let jt_off = sink.cur_offset();
|
||||
for &target in info.targets.iter() {
|
||||
let word_off = sink.cur_offset();
|
||||
let off_into_table = word_off - jt_off;
|
||||
sink.use_label_at_offset(
|
||||
word_off,
|
||||
target.as_label().unwrap(),
|
||||
LabelUse::PCRel32,
|
||||
);
|
||||
sink.put4(off_into_table);
|
||||
for target in targets {
|
||||
let off = target.as_offset_words() * 4;
|
||||
let off = i32::try_from(off).unwrap();
|
||||
// cast i32 to u32 (two's-complement)
|
||||
let off = off as u32;
|
||||
sink.put4(off);
|
||||
}
|
||||
|
||||
// Lowering produces an EmitIsland before using a JTSequence, so we can safely
|
||||
// disable the worst-case-size check in this case.
|
||||
start_off = sink.cur_offset();
|
||||
}
|
||||
&Inst::LoadConst64 { rd, const_data } => {
|
||||
let inst = Inst::ULoad64 {
|
||||
|
@ -1422,11 +1297,11 @@ impl MachInstEmit for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
sink.put8(const_data);
|
||||
}
|
||||
&Inst::LoadExtName {
|
||||
|
@ -1440,11 +1315,11 @@ impl MachInstEmit for Inst {
|
|||
mem: MemArg::Label(MemLabel::PCRel(8)),
|
||||
srcloc: None, // can't cause a user trap.
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(12),
|
||||
};
|
||||
inst.emit(sink, flags, state);
|
||||
inst.emit(sink, flags);
|
||||
sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
|
||||
if flags.emit_all_ones_funcaddrs() {
|
||||
sink.put8(u64::max_value());
|
||||
|
@ -1452,82 +1327,53 @@ impl MachInstEmit for Inst {
|
|||
sink.put8(0);
|
||||
}
|
||||
}
|
||||
&Inst::LoadAddr { rd, ref mem } => {
|
||||
let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
|
||||
for inst in mem_insts.into_iter() {
|
||||
inst.emit(sink, flags, state);
|
||||
}
|
||||
|
||||
let (reg, offset) = match mem {
|
||||
MemArg::Unscaled(r, simm9) => (r, simm9.value()),
|
||||
MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
|
||||
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
|
||||
};
|
||||
let abs_offset = if offset < 0 {
|
||||
-offset as u64
|
||||
} else {
|
||||
offset as u64
|
||||
};
|
||||
let alu_op = if offset < 0 {
|
||||
ALUOp::Sub64
|
||||
} else {
|
||||
ALUOp::Add64
|
||||
};
|
||||
|
||||
if offset == 0 {
|
||||
let mov = Inst::mov(rd, reg);
|
||||
mov.emit(sink, flags, state);
|
||||
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
|
||||
let add = Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: reg,
|
||||
imm12,
|
||||
&Inst::LoadAddr { rd, ref mem } => match *mem {
|
||||
MemArg::FPOffset(fp_off) => {
|
||||
let alu_op = if fp_off < 0 {
|
||||
ALUOp::Sub64
|
||||
} else {
|
||||
ALUOp::Add64
|
||||
};
|
||||
add.emit(sink, flags, state);
|
||||
} else {
|
||||
// Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction
|
||||
// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
|
||||
// that no other instructions will be inserted here (we're emitting directly),
|
||||
// and a live range of `tmp2` should not span this instruction, so this use
|
||||
// should otherwise be correct.
|
||||
debug_assert!(rd.to_reg() != tmp2_reg());
|
||||
debug_assert!(reg != tmp2_reg());
|
||||
let tmp = writable_tmp2_reg();
|
||||
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
|
||||
insn.emit(sink, flags, state);
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap())
|
||||
{
|
||||
let inst = Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
rd,
|
||||
imm12,
|
||||
rn: fp_reg(),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
} else {
|
||||
let const_insts =
|
||||
Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap());
|
||||
for inst in const_insts {
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
let inst = Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: fp_reg(),
|
||||
rm: rd.to_reg(),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
let add = Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn: reg,
|
||||
rm: tmp.to_reg(),
|
||||
};
|
||||
add.emit(sink, flags, state);
|
||||
}
|
||||
_ => unimplemented!("{:?}", mem),
|
||||
},
|
||||
&Inst::GetPinnedReg { rd } => {
|
||||
let inst = Inst::Mov {
|
||||
rd,
|
||||
rm: xreg(PINNED_REG),
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
&Inst::VirtualSPOffsetAdj { offset } => {
|
||||
debug!(
|
||||
"virtual sp offset adjusted by {} -> {}",
|
||||
offset,
|
||||
state.virtual_sp_offset + offset
|
||||
);
|
||||
state.virtual_sp_offset += offset;
|
||||
}
|
||||
&Inst::EmitIsland { needed_space } => {
|
||||
if sink.island_needed(needed_space + 4) {
|
||||
let jump_around_label = sink.get_label();
|
||||
let jmp = Inst::Jump {
|
||||
dest: BranchTarget::Label(jump_around_label),
|
||||
};
|
||||
jmp.emit(sink, flags, state);
|
||||
sink.emit_island();
|
||||
sink.bind_label(jump_around_label);
|
||||
}
|
||||
&Inst::SetPinnedReg { rm } => {
|
||||
let inst = Inst::Mov {
|
||||
rd: Writable::from_reg(xreg(PINNED_REG)),
|
||||
rm,
|
||||
};
|
||||
inst.emit(sink, flags);
|
||||
}
|
||||
}
|
||||
|
||||
let end_off = sink.cur_offset();
|
||||
debug_assert!((end_off - start_off) <= Inst::worst_case_size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@ use crate::isa::aarch64::inst::*;
|
|||
use crate::isa::test_utils;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
#[test]
|
||||
|
@ -1311,68 +1310,38 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(32768, I8),
|
||||
mem: MemArg::FPOffset(32768),
|
||||
srcloc: None,
|
||||
},
|
||||
"100090D2B063308B010240F9",
|
||||
"movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
"0F0090D2EF011D8BE10140F9",
|
||||
"movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(-32768, I8),
|
||||
mem: MemArg::FPOffset(-32768),
|
||||
srcloc: None,
|
||||
},
|
||||
"F0FF8F92B063308B010240F9",
|
||||
"movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
"EFFF8F92EF011D8BE10140F9",
|
||||
"movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(1048576, I8), // 2^20
|
||||
mem: MemArg::FPOffset(1048576), // 2^20
|
||||
srcloc: None,
|
||||
},
|
||||
"1002A0D2B063308B010240F9",
|
||||
"movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
"0F02A0D2EF011D8BE10140F9",
|
||||
"movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
));
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1
|
||||
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1
|
||||
srcloc: None,
|
||||
},
|
||||
"300080D21002A0F2B063308B010240F9",
|
||||
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 8, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"E18040F8",
|
||||
"ldur x1, [x7, #8]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 1024, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"E10042F9",
|
||||
"ldr x1, [x7, #1024]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::ULoad64 {
|
||||
rd: writable_xreg(1),
|
||||
mem: MemArg::RegOffset(xreg(7), 1048576, I64),
|
||||
srcloc: None,
|
||||
},
|
||||
"1002A0D2F060308B010240F9",
|
||||
"movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
|
||||
"2F0080D20F02A0F2EF011D8BE10140F9",
|
||||
"movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
|
@ -1832,7 +1801,6 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
|
@ -1843,7 +1811,6 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
|
@ -1854,7 +1821,6 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
|
@ -1865,83 +1831,10 @@ fn test_aarch64_binemit() {
|
|||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmeq,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
},
|
||||
"E38E386E",
|
||||
"cmeq v3.16b, v23.16b, v24.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmgt,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
},
|
||||
"E336384E",
|
||||
"cmgt v3.16b, v23.16b, v24.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmge,
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I8X16,
|
||||
},
|
||||
"373D2C4E",
|
||||
"cmge v23.16b, v9.16b, v12.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmhi,
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"2534216E",
|
||||
"cmhi v5.16b, v1.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Cmhs,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I8X16,
|
||||
},
|
||||
"483C2F6E",
|
||||
"cmhs v8.16b, v2.16b, v15.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"2258206E",
|
||||
"mvn v2.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::Extend {
|
||||
rd: writable_xreg(1),
|
||||
|
@ -2062,7 +1955,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Zero(xreg(8)),
|
||||
},
|
||||
|
@ -2070,7 +1963,7 @@ fn test_aarch64_binemit() {
|
|||
"cbz x8, 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::NotZero(xreg(8)),
|
||||
},
|
||||
|
@ -2078,7 +1971,7 @@ fn test_aarch64_binemit() {
|
|||
"cbnz x8, 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Eq),
|
||||
},
|
||||
|
@ -2086,7 +1979,7 @@ fn test_aarch64_binemit() {
|
|||
"b.eq 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ne),
|
||||
},
|
||||
|
@ -2095,7 +1988,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Hs),
|
||||
},
|
||||
|
@ -2103,7 +1996,7 @@ fn test_aarch64_binemit() {
|
|||
"b.hs 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Lo),
|
||||
},
|
||||
|
@ -2111,7 +2004,7 @@ fn test_aarch64_binemit() {
|
|||
"b.lo 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Mi),
|
||||
},
|
||||
|
@ -2119,7 +2012,7 @@ fn test_aarch64_binemit() {
|
|||
"b.mi 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Pl),
|
||||
},
|
||||
|
@ -2127,7 +2020,7 @@ fn test_aarch64_binemit() {
|
|||
"b.pl 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Vs),
|
||||
},
|
||||
|
@ -2135,7 +2028,7 @@ fn test_aarch64_binemit() {
|
|||
"b.vs 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Vc),
|
||||
},
|
||||
|
@ -2143,7 +2036,7 @@ fn test_aarch64_binemit() {
|
|||
"b.vc 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Hi),
|
||||
},
|
||||
|
@ -2151,7 +2044,7 @@ fn test_aarch64_binemit() {
|
|||
"b.hi 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ls),
|
||||
},
|
||||
|
@ -2159,7 +2052,7 @@ fn test_aarch64_binemit() {
|
|||
"b.ls 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Ge),
|
||||
},
|
||||
|
@ -2167,7 +2060,7 @@ fn test_aarch64_binemit() {
|
|||
"b.ge 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Lt),
|
||||
},
|
||||
|
@ -2175,7 +2068,7 @@ fn test_aarch64_binemit() {
|
|||
"b.lt 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Gt),
|
||||
},
|
||||
|
@ -2183,7 +2076,7 @@ fn test_aarch64_binemit() {
|
|||
"b.gt 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Le),
|
||||
},
|
||||
|
@ -2191,7 +2084,7 @@ fn test_aarch64_binemit() {
|
|||
"b.le 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Al),
|
||||
},
|
||||
|
@ -2199,7 +2092,7 @@ fn test_aarch64_binemit() {
|
|||
"b.al 64",
|
||||
));
|
||||
insns.push((
|
||||
Inst::OneWayCondBr {
|
||||
Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(64),
|
||||
kind: CondBrKind::Cond(Cond::Nv),
|
||||
},
|
||||
|
@ -2208,7 +2101,7 @@ fn test_aarch64_binemit() {
|
|||
));
|
||||
|
||||
insns.push((
|
||||
Inst::CondBr {
|
||||
Inst::CondBrLoweredCompound {
|
||||
taken: BranchTarget::ResolvedOffset(64),
|
||||
not_taken: BranchTarget::ResolvedOffset(128),
|
||||
kind: CondBrKind::Cond(Cond::Le),
|
||||
|
@ -2219,13 +2112,11 @@ fn test_aarch64_binemit() {
|
|||
|
||||
insns.push((
|
||||
Inst::Call {
|
||||
info: Box::new(CallInfo {
|
||||
dest: ExternalName::testcase("test0"),
|
||||
uses: Vec::new(),
|
||||
defs: Vec::new(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::Call,
|
||||
}),
|
||||
dest: ExternalName::testcase("test0"),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::Call,
|
||||
},
|
||||
"00000094",
|
||||
"bl 0",
|
||||
|
@ -2233,13 +2124,11 @@ fn test_aarch64_binemit() {
|
|||
|
||||
insns.push((
|
||||
Inst::CallInd {
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: xreg(10),
|
||||
uses: Vec::new(),
|
||||
defs: Vec::new(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::CallIndirect,
|
||||
}),
|
||||
rn: xreg(10),
|
||||
uses: Set::empty(),
|
||||
defs: Set::empty(),
|
||||
loc: SourceLoc::default(),
|
||||
opcode: Opcode::CallIndirect,
|
||||
},
|
||||
"40013FD6",
|
||||
"blr x10",
|
||||
|
@ -2248,7 +2137,7 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::IndirectBr {
|
||||
rn: xreg(3),
|
||||
targets: vec![],
|
||||
targets: vec![1, 2, 3],
|
||||
},
|
||||
"60001FD6",
|
||||
"br x3",
|
||||
|
@ -2259,7 +2148,7 @@ fn test_aarch64_binemit() {
|
|||
insns.push((
|
||||
Inst::Adr {
|
||||
rd: writable_xreg(15),
|
||||
off: (1 << 20) - 4,
|
||||
label: MemLabel::PCRel((1 << 20) - 4),
|
||||
},
|
||||
"EFFF7F10",
|
||||
"adr x15, pc+1048572",
|
||||
|
@ -2274,15 +2163,6 @@ fn test_aarch64_binemit() {
|
|||
"mov v8.8b, v4.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuMove128 {
|
||||
rd: writable_vreg(17),
|
||||
rn: vreg(26),
|
||||
},
|
||||
"511FBA4E",
|
||||
"mov v17.16b, v26.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRR {
|
||||
fpu_op: FPUOp1::Abs32,
|
||||
|
@ -2519,46 +2399,6 @@ fn test_aarch64_binemit() {
|
|||
"fmadd d15, d30, d31, d1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(5),
|
||||
},
|
||||
"A204202F",
|
||||
"ushr v2.2s, v5.2s, #32",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(5),
|
||||
},
|
||||
"A204417F",
|
||||
"ushr d2, d5, #63",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(10),
|
||||
},
|
||||
"44553F2F",
|
||||
"sli v4.2s, v10.2s, #31",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRI {
|
||||
fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(10),
|
||||
},
|
||||
"44557F7F",
|
||||
"sli d4, d10, #63",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuToInt {
|
||||
op: FpuToIntOp::F32ToU32,
|
||||
|
@ -2845,15 +2685,6 @@ fn test_aarch64_binemit() {
|
|||
"ldr d16, pc+8 ; b 12 ; data.f64 1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst128 {
|
||||
rd: writable_vreg(5),
|
||||
const_data: 0x0f0e0d0c0b0a09080706050403020100,
|
||||
},
|
||||
"4500009C05000014000102030405060708090A0B0C0D0E0F",
|
||||
"ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuCSel32 {
|
||||
rd: writable_vreg(1),
|
||||
|
@ -2960,11 +2791,19 @@ fn test_aarch64_binemit() {
|
|||
let actual_printing = insn.show_rru(Some(&rru));
|
||||
assert_eq!(expected_printing, actual_printing);
|
||||
|
||||
// Check the encoding is as expected.
|
||||
let text_size = {
|
||||
let mut code_sec = MachSectionSize::new(0);
|
||||
insn.emit(&mut code_sec, &flags);
|
||||
code_sec.size()
|
||||
};
|
||||
|
||||
let mut sink = test_utils::TestCodeSink::new();
|
||||
let mut buffer = MachBuffer::new();
|
||||
insn.emit(&mut buffer, &flags, &mut Default::default());
|
||||
let buffer = buffer.finish();
|
||||
buffer.emit(&mut sink);
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, text_size);
|
||||
let code_sec = sections.get_section(code_idx);
|
||||
insn.emit(code_sec, &flags);
|
||||
sections.emit(&mut sink);
|
||||
let actual_encoding = &sink.stringify();
|
||||
assert_eq!(expected_encoding, actual_encoding);
|
||||
}
|
||||
|
|
|
@ -106,85 +106,6 @@ impl SImm7Scaled {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct FPULeftShiftImm {
|
||||
pub amount: u8,
|
||||
pub lane_size_in_bits: u8,
|
||||
}
|
||||
|
||||
impl FPULeftShiftImm {
|
||||
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||
if amount < lane_size_in_bits {
|
||||
Some(Self {
|
||||
amount,
|
||||
lane_size_in_bits,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enc(&self) -> u32 {
|
||||
debug_assert!(self.lane_size_in_bits.is_power_of_two());
|
||||
debug_assert!(self.lane_size_in_bits > self.amount);
|
||||
// The encoding of the immediate follows the table below,
|
||||
// where xs encode the shift amount.
|
||||
//
|
||||
// | lane_size_in_bits | encoding |
|
||||
// +------------------------------+
|
||||
// | 8 | 0001xxx |
|
||||
// | 16 | 001xxxx |
|
||||
// | 32 | 01xxxxx |
|
||||
// | 64 | 1xxxxxx |
|
||||
//
|
||||
// The highest one bit is represented by `lane_size_in_bits`. Since
|
||||
// `lane_size_in_bits` is a power of 2 and `amount` is less
|
||||
// than `lane_size_in_bits`, they can be ORed
|
||||
// together to produced the encoded value.
|
||||
u32::from(self.lane_size_in_bits | self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct FPURightShiftImm {
|
||||
pub amount: u8,
|
||||
pub lane_size_in_bits: u8,
|
||||
}
|
||||
|
||||
impl FPURightShiftImm {
|
||||
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||
if amount > 0 && amount <= lane_size_in_bits {
|
||||
Some(Self {
|
||||
amount,
|
||||
lane_size_in_bits,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enc(&self) -> u32 {
|
||||
debug_assert_ne!(0, self.amount);
|
||||
// The encoding of the immediate follows the table below,
|
||||
// where xs encodes the negated shift amount.
|
||||
//
|
||||
// | lane_size_in_bits | encoding |
|
||||
// +------------------------------+
|
||||
// | 8 | 0001xxx |
|
||||
// | 16 | 001xxxx |
|
||||
// | 32 | 01xxxxx |
|
||||
// | 64 | 1xxxxxx |
|
||||
//
|
||||
// The shift amount is negated such that a shift ammount
|
||||
// of 1 (in 64-bit) is encoded as 0b111111 and a shift
|
||||
// amount of 64 is encoded as 0b000000,
|
||||
// in the bottom 6 bits.
|
||||
u32::from((self.lane_size_in_bits * 2) - self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
/// a 9-bit signed offset.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SImm9 {
|
||||
|
@ -213,11 +134,6 @@ impl SImm9 {
|
|||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32) & 0x1ff
|
||||
}
|
||||
|
||||
/// Signed value of immediate.
|
||||
pub fn value(&self) -> i32 {
|
||||
self.value as i32
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsigned, scaled 12-bit offset.
|
||||
|
@ -256,16 +172,6 @@ impl UImm12Scaled {
|
|||
pub fn bits(&self) -> u32 {
|
||||
(self.value as u32 / self.scale_ty.bytes()) & 0xfff
|
||||
}
|
||||
|
||||
/// Value after scaling.
|
||||
pub fn value(&self) -> u32 {
|
||||
self.value as u32
|
||||
}
|
||||
|
||||
/// The value type which is the scaling base.
|
||||
pub fn scale_ty(&self) -> Type {
|
||||
self.scale_ty
|
||||
}
|
||||
}
|
||||
|
||||
/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
|
||||
|
@ -660,18 +566,6 @@ impl ShowWithRRU for SImm7Scaled {
|
|||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for FPULeftShiftImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for FPURightShiftImm {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.amount)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for SImm9 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("#{}", self.value)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,6 +1,5 @@
|
|||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::isa::aarch64::inst::InstSize;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
@ -21,21 +20,23 @@ pub const PINNED_REG: u8 = 21;
|
|||
const XREG_INDICES: [u8; 31] = [
|
||||
// X0 - X7
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
// X8 - X15
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
// X8 - X14
|
||||
40, 41, 42, 43, 44, 45, 46,
|
||||
// X15
|
||||
59,
|
||||
// X16, X17
|
||||
58, 59,
|
||||
47, 48,
|
||||
// X18
|
||||
60,
|
||||
// X19, X20
|
||||
48, 49,
|
||||
49, 50,
|
||||
// X21, put aside because it's the pinned register.
|
||||
57,
|
||||
58,
|
||||
// X22 - X28
|
||||
50, 51, 52, 53, 54, 55, 56,
|
||||
// X29 (FP)
|
||||
51, 52, 53, 54, 55, 56, 57,
|
||||
// X29
|
||||
61,
|
||||
// X30 (LR)
|
||||
// X30
|
||||
62,
|
||||
];
|
||||
|
||||
|
@ -124,17 +125,14 @@ pub fn writable_fp_reg() -> Writable<Reg> {
|
|||
Writable::from_reg(fp_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
|
||||
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
|
||||
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
|
||||
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
|
||||
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
|
||||
///
|
||||
/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
|
||||
/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
|
||||
/// to live through call instructions.
|
||||
/// Get a reference to the "spill temp" register. This register is used to
|
||||
/// compute the address of a spill slot when a direct offset addressing mode from
|
||||
/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc
|
||||
/// and reserve it for this purpose for simplicity; otherwise we need a
|
||||
/// multi-stage analysis where we first determine how many spill slots we have,
|
||||
/// then perhaps remove the reg from the pool and recompute regalloc.
|
||||
pub fn spilltmp_reg() -> Reg {
|
||||
xreg(16)
|
||||
xreg(15)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the spilltmp reg.
|
||||
|
@ -142,20 +140,6 @@ pub fn writable_spilltmp_reg() -> Writable<Reg> {
|
|||
Writable::from_reg(spilltmp_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the second temp register. We need this in some edge cases
|
||||
/// where we need both the spilltmp and another temporary.
|
||||
///
|
||||
/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
|
||||
/// free to use otherwise.
|
||||
pub fn tmp2_reg() -> Reg {
|
||||
xreg(17)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the tmp2 reg.
|
||||
pub fn writable_tmp2_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(tmp2_reg())
|
||||
}
|
||||
|
||||
/// Create the register universe for AArch64.
|
||||
pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = vec![];
|
||||
|
@ -189,7 +173,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
|
||||
for i in 0u8..32u8 {
|
||||
// See above for excluded registers.
|
||||
if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
|
||||
if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
|
||||
continue;
|
||||
}
|
||||
let reg = Reg::new_real(
|
||||
|
@ -207,7 +191,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: x_reg_base as usize,
|
||||
last: x_reg_last as usize,
|
||||
suggested_scratch: Some(XREG_INDICES[19] as usize),
|
||||
suggested_scratch: Some(XREG_INDICES[13] as usize),
|
||||
});
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: v_reg_base as usize,
|
||||
|
@ -227,8 +211,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
|
|||
regs.len()
|
||||
};
|
||||
|
||||
regs.push((xreg(16).to_real_reg(), "x16".to_string()));
|
||||
regs.push((xreg(17).to_real_reg(), "x17".to_string()));
|
||||
regs.push((xreg(15).to_real_reg(), "x15".to_string()));
|
||||
regs.push((xreg(18).to_real_reg(), "x18".to_string()));
|
||||
regs.push((fp_reg().to_real_reg(), "fp".to_string()));
|
||||
regs.push((link_reg().to_real_reg(), "lr".to_string()));
|
||||
|
@ -276,17 +259,13 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
|
|||
s
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
/// Show a vector register when its use as a 32-bit or 64-bit float is known.
|
||||
pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSize) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
if reg.get_class() != RegClass::V128 {
|
||||
return s;
|
||||
}
|
||||
let prefix = match size {
|
||||
InstSize::Size32 => "s",
|
||||
InstSize::Size64 => "d",
|
||||
InstSize::Size128 => "q",
|
||||
};
|
||||
let prefix = if size.is32() { "s" } else { "d" };
|
||||
s.replace_range(0..1, prefix);
|
||||
s
|
||||
}
|
||||
|
@ -312,17 +291,3 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
|||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
match ty {
|
||||
I8X16 => s.push_str(".16b"),
|
||||
F32X2 => s.push_str(".2s"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
s
|
||||
}
|
||||
|
|
|
@ -14,14 +14,12 @@ use crate::ir::Inst as IRInst;
|
|||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::CodegenResult;
|
||||
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
|
||||
use super::lower_inst;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
|
||||
//============================================================================
|
||||
|
@ -106,18 +104,11 @@ pub(crate) enum ResultRegImmShift {
|
|||
}
|
||||
|
||||
//============================================================================
|
||||
// Instruction input "slots".
|
||||
// Instruction input and output "slots".
|
||||
//
|
||||
// We use these types to refer to operand numbers, and result numbers, together
|
||||
// with the associated instruction, in a type-safe way.
|
||||
|
||||
/// Identifier for a particular input of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InsnInput {
|
||||
pub(crate) insn: IRInst,
|
||||
pub(crate) input: usize,
|
||||
}
|
||||
|
||||
/// Identifier for a particular output of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InsnOutput {
|
||||
|
@ -125,48 +116,102 @@ pub(crate) struct InsnOutput {
|
|||
pub(crate) output: usize,
|
||||
}
|
||||
|
||||
/// Identifier for a particular input of an instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct InsnInput {
|
||||
pub(crate) insn: IRInst,
|
||||
pub(crate) input: usize,
|
||||
}
|
||||
|
||||
/// Producer of a value: either a previous instruction's output, or a register that will be
|
||||
/// codegen'd separately.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum InsnInputSource {
|
||||
Output(InsnOutput),
|
||||
Reg(Reg),
|
||||
}
|
||||
|
||||
impl InsnInputSource {
|
||||
fn as_output(self) -> Option<InsnOutput> {
|
||||
match self {
|
||||
InsnInputSource::Output(o) => Some(o),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_input<C: LowerCtx<I = Inst>>(ctx: &mut C, output: InsnOutput, num: usize) -> InsnInput {
|
||||
assert!(num <= ctx.num_inputs(output.insn));
|
||||
InsnInput {
|
||||
insn: output.insn,
|
||||
input: num,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an instruction input to a producing instruction's output if possible (in same BB), or a
|
||||
/// register otherwise.
|
||||
fn input_source<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> InsnInputSource {
|
||||
if let Some((input_inst, result_num)) = ctx.input_inst(input.insn, input.input) {
|
||||
let out = InsnOutput {
|
||||
insn: input_inst,
|
||||
output: result_num,
|
||||
};
|
||||
InsnInputSource::Output(out)
|
||||
} else {
|
||||
let reg = ctx.input(input.insn, input.input);
|
||||
InsnInputSource::Reg(reg)
|
||||
}
|
||||
}
|
||||
|
||||
//============================================================================
|
||||
// Lowering: convert instruction inputs to forms that we can use.
|
||||
// Lowering: convert instruction outputs to result types.
|
||||
|
||||
/// Lower an instruction input to a 64-bit constant, if possible.
|
||||
pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
|
||||
let input = ctx.get_input(input.insn, input.input);
|
||||
input.constant
|
||||
}
|
||||
|
||||
/// Lower an instruction input to a constant register-shift amount, if possible.
|
||||
pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> Option<ShiftOpShiftImm> {
|
||||
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<u128> {
|
||||
/// Lower an instruction output to a 64-bit constant, if possible.
|
||||
pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
|
||||
if out.output > 0 {
|
||||
None
|
||||
} else {
|
||||
let inst_data = ctx.data(out.insn);
|
||||
|
||||
match inst_data {
|
||||
&InstructionData::UnaryConst {
|
||||
opcode: _,
|
||||
constant_handle,
|
||||
} => {
|
||||
let mut bytes = [0u8; 16];
|
||||
let c = ctx.get_constant_data(constant_handle).clone().into_vec();
|
||||
assert_eq!(c.len(), 16);
|
||||
bytes.copy_from_slice(&c);
|
||||
Some(u128::from_le_bytes(bytes))
|
||||
if inst_data.opcode() == Opcode::Null {
|
||||
Some(0)
|
||||
} else {
|
||||
match inst_data {
|
||||
&InstructionData::UnaryImm { opcode: _, imm } => {
|
||||
// Only has Into for i64; we use u64 elsewhere, so we cast.
|
||||
let imm: i64 = imm.into();
|
||||
Some(imm as u64)
|
||||
}
|
||||
&InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
|
||||
&InstructionData::UnaryIeee32 { opcode: _, imm } => Some(u64::from(imm.bits())),
|
||||
&InstructionData::UnaryIeee64 { opcode: _, imm } => Some(imm.bits()),
|
||||
_ => None,
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f32<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<f32> {
|
||||
output_to_const(ctx, out).map(|value| f32::from_bits(value as u32))
|
||||
}
|
||||
|
||||
pub(crate) fn output_to_const_f64<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<f64> {
|
||||
output_to_const(ctx, out).map(|value| f64::from_bits(value))
|
||||
}
|
||||
|
||||
/// Lower an instruction output to a constant register-shift amount, if possible.
|
||||
pub(crate) fn output_to_shiftimm<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
out: InsnOutput,
|
||||
) -> Option<ShiftOpShiftImm> {
|
||||
output_to_const(ctx, out).and_then(ShiftOpShiftImm::maybe_from_shift)
|
||||
}
|
||||
|
||||
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
|
||||
/// parameter to `input_to_*` below.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
|
@ -192,9 +237,9 @@ impl NarrowValueMode {
|
|||
}
|
||||
}
|
||||
|
||||
/// Allocate a register for an instruction output and return it.
|
||||
/// Lower an instruction output to a reg.
|
||||
pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
|
||||
ctx.get_output(out.insn, out.output)
|
||||
ctx.output(out.insn, out.output)
|
||||
}
|
||||
|
||||
/// Lower an instruction input to a reg.
|
||||
|
@ -207,31 +252,13 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> Reg {
|
||||
debug!("input_to_reg: input {:?}", input);
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let from_bits = ty_bits(ty) as u8;
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
let in_reg = if let Some(c) = inputs.constant {
|
||||
let masked = if from_bits < 64 {
|
||||
c & ((1u64 << from_bits) - 1)
|
||||
} else {
|
||||
c
|
||||
};
|
||||
// Generate constants fresh at each use to minimize long-range register pressure.
|
||||
let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
|
||||
for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
to_reg.to_reg()
|
||||
} else {
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
};
|
||||
|
||||
let in_reg = ctx.input(input.insn, input.input);
|
||||
match (narrow_mode, from_bits) {
|
||||
(NarrowValueMode::None, _) => in_reg,
|
||||
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -242,7 +269,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
tmp.to_reg()
|
||||
}
|
||||
(NarrowValueMode::SignExtend32, n) if n < 32 => {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -255,23 +282,18 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
|
||||
|
||||
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
|
||||
if inputs.constant.is_some() {
|
||||
// Constants are zero-extended to full 64-bit width on load already.
|
||||
in_reg
|
||||
} else {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
signed: false,
|
||||
from_bits,
|
||||
to_bits: 64,
|
||||
});
|
||||
tmp.to_reg()
|
||||
}
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
signed: false,
|
||||
from_bits,
|
||||
to_bits: 64,
|
||||
});
|
||||
tmp.to_reg()
|
||||
}
|
||||
(NarrowValueMode::SignExtend64, n) if n < 64 => {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd: tmp,
|
||||
rn: in_reg,
|
||||
|
@ -282,7 +304,6 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
tmp.to_reg()
|
||||
}
|
||||
(_, 64) => in_reg,
|
||||
(_, 128) => in_reg,
|
||||
|
||||
_ => panic!(
|
||||
"Unsupported input width: input ty {} bits {} mode {:?}",
|
||||
|
@ -292,6 +313,8 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
|
||||
/// This does not actually codegen the source instruction; it just uses the
|
||||
/// vreg into which the source instruction will generate its value.
|
||||
///
|
||||
/// The `narrow_mode` flag indicates whether the consumer of this value needs
|
||||
/// the high bits clear. For many operations, such as an add/sub/mul or any
|
||||
|
@ -307,18 +330,23 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRS {
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
if let Some((insn, 0)) = inputs.inst {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
let insn = out.insn;
|
||||
assert!(out.output <= ctx.num_outputs(insn));
|
||||
let op = ctx.data(insn).opcode();
|
||||
|
||||
if op == Opcode::Ishl {
|
||||
let shiftee = InsnInput { insn, input: 0 };
|
||||
let shift_amt = InsnInput { insn, input: 1 };
|
||||
let shiftee = get_input(ctx, out, 0);
|
||||
let shift_amt = get_input(ctx, out, 1);
|
||||
|
||||
// Can we get the shift amount as an immediate?
|
||||
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
|
||||
let reg = input_to_reg(ctx, shiftee, narrow_mode);
|
||||
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
|
||||
if let Some(shift_amt_out) = input_source(ctx, shift_amt).as_output() {
|
||||
if let Some(shiftimm) = output_to_shiftimm(ctx, shift_amt_out) {
|
||||
let reg = input_to_reg(ctx, shiftee, narrow_mode);
|
||||
ctx.merged(insn);
|
||||
ctx.merged(shift_amt_out.insn);
|
||||
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -336,10 +364,11 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSE {
|
||||
let inputs = ctx.get_input(input.insn, input.input);
|
||||
if let Some((insn, 0)) = inputs.inst {
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
let insn = out.insn;
|
||||
assert!(out.output <= ctx.num_outputs(insn));
|
||||
let op = ctx.data(insn).opcode();
|
||||
let out_ty = ctx.output_ty(insn, 0);
|
||||
let out_ty = ctx.output_ty(insn, out.output);
|
||||
let out_bits = ty_bits(out_ty);
|
||||
|
||||
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
|
||||
|
@ -349,7 +378,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
&& ((narrow_mode.is_32bit() && out_bits < 32)
|
||||
|| (!narrow_mode.is_32bit() && out_bits < 64))
|
||||
{
|
||||
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
let reg = output_to_reg(ctx, out);
|
||||
let extendop = match (narrow_mode, out_bits) {
|
||||
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
|
||||
ExtendOp::SXTB
|
||||
|
@ -373,14 +402,15 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
return ResultRSE::RegExtend(reg.to_reg(), extendop);
|
||||
}
|
||||
|
||||
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
|
||||
if op == Opcode::Uextend || op == Opcode::Sextend {
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
let sign_extend = op == Opcode::Sextend;
|
||||
let inner_ty = ctx.input_ty(insn, 0);
|
||||
let extendee = get_input(ctx, out, 0);
|
||||
let inner_ty = ctx.input_ty(extendee.insn, extendee.input);
|
||||
let inner_bits = ty_bits(inner_ty);
|
||||
assert!(inner_bits < out_bits);
|
||||
let extendop = match (sign_extend, inner_bits) {
|
||||
|
@ -394,7 +424,8 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
|
|||
(false, 32) => ExtendOp::UXTW,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
|
||||
let reg = input_to_reg(ctx, extendee, NarrowValueMode::None);
|
||||
ctx.merged(insn);
|
||||
return ResultRSE::RegExtend(reg, extendop);
|
||||
}
|
||||
}
|
||||
|
@ -407,9 +438,12 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSEImm12 {
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
||||
return ResultRSEImm12::Imm12(i);
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRSEImm12::Imm12(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,11 +455,14 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
|
|||
input: InsnInput,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> ResultRSImmLogic {
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
let ty = ctx.input_ty(input.insn, input.input);
|
||||
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
|
||||
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
|
||||
return ResultRSImmLogic::ImmLogic(i);
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
let ty = ctx.output_ty(out.insn, out.output);
|
||||
let ty = if ty_bits(ty) < 32 { I32 } else { ty };
|
||||
if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRSImmLogic::ImmLogic(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -436,9 +473,12 @@ pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
|
|||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
) -> ResultRegImmShift {
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
|
||||
return ResultRegImmShift::ImmShift(immshift);
|
||||
if let InsnInputSource::Output(out) = input_source(ctx, input) {
|
||||
if let Some(imm_value) = output_to_const(ctx, out) {
|
||||
if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
|
||||
ctx.merged(out.insn);
|
||||
return ResultRegImmShift::ImmShift(immshift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,10 +584,12 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
|
||||
// mul instructions (Load/StoreComplex don't include scale factors).
|
||||
|
||||
// Handle one reg and offset.
|
||||
// Handle one reg and offset that fits in immediate, if possible.
|
||||
if addends.len() == 1 {
|
||||
let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
||||
return MemArg::RegOffset(reg, offset as i64, elem_ty);
|
||||
if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) {
|
||||
return memarg;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle two regs and a zero offset, if possible.
|
||||
|
@ -558,7 +600,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
// Otherwise, generate add instructions.
|
||||
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let addr = ctx.tmp(RegClass::I64, I64);
|
||||
|
||||
// Get the const into a reg.
|
||||
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
||||
|
@ -570,7 +612,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
|||
// In an addition, the stack register is the zero register, so divert it to another
|
||||
// register just before doing the actual add.
|
||||
let reg = if reg == stack_reg() {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::Mov {
|
||||
rd: tmp,
|
||||
rm: stack_reg(),
|
||||
|
@ -617,14 +659,6 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::load_fp_constant64(rd, value));
|
||||
}
|
||||
|
||||
pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
rd: Writable<Reg>,
|
||||
value: u128,
|
||||
) {
|
||||
ctx.emit(Inst::load_fp_constant128(rd, value));
|
||||
}
|
||||
|
||||
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
|
||||
match cc {
|
||||
IntCC::Equal => Cond::Eq,
|
||||
|
@ -716,7 +750,6 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
B64 | I64 | F64 => 64,
|
||||
B128 | I128 => 128,
|
||||
IFLAGS | FFLAGS => 32,
|
||||
I8X16 | B8X16 => 128,
|
||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||
}
|
||||
}
|
||||
|
@ -724,7 +757,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||
match ty {
|
||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||
F32 | F64 | B128 | I128 | I8X16 => false,
|
||||
F32 | F64 | B128 | I128 => false,
|
||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||
}
|
||||
|
@ -790,29 +823,24 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Checks for an instance of `op` feeding the given input.
|
||||
/// Checks for an instance of `op` feeding the given input. Marks as merged (decrementing refcount) if so.
|
||||
pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
|
||||
c: &mut C,
|
||||
input: InsnInput,
|
||||
op: Opcode,
|
||||
) -> Option<IRInst> {
|
||||
let inputs = c.get_input(input.insn, input.input);
|
||||
debug!(
|
||||
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
|
||||
input, inputs, op
|
||||
);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
debug!(" -> input inst {:?}", data);
|
||||
if let InsnInputSource::Output(out) = input_source(c, input) {
|
||||
let data = c.data(out.insn);
|
||||
if data.opcode() == op {
|
||||
return Some(src_inst);
|
||||
c.merged(out.insn);
|
||||
return Some(out.insn);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
|
||||
/// Bint or a bitcast).
|
||||
/// Bint or a bitcast). Marks one or both as merged if so, as appropriate.
|
||||
///
|
||||
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
|
||||
/// a bit more generic.
|
||||
|
@ -822,19 +850,21 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
|
|||
op: Opcode,
|
||||
conv: Opcode,
|
||||
) -> Option<IRInst> {
|
||||
let inputs = c.get_input(input.insn, input.input);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
if data.opcode() == op {
|
||||
return Some(src_inst);
|
||||
}
|
||||
if let Some(ret) = maybe_input_insn(c, input, op) {
|
||||
return Some(ret);
|
||||
}
|
||||
|
||||
if let InsnInputSource::Output(out) = input_source(c, input) {
|
||||
let data = c.data(out.insn);
|
||||
if data.opcode() == conv {
|
||||
let inputs = c.get_input(src_inst, 0);
|
||||
if let Some((src_inst, _)) = inputs.inst {
|
||||
let data = c.data(src_inst);
|
||||
if data.opcode() == op {
|
||||
return Some(src_inst);
|
||||
}
|
||||
let conv_insn = out.insn;
|
||||
let conv_input = InsnInput {
|
||||
insn: conv_insn,
|
||||
input: 0,
|
||||
};
|
||||
if let Some(inner) = maybe_input_insn(c, conv_input, op) {
|
||||
c.merged(conv_insn);
|
||||
return Some(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -846,7 +876,6 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
insn: IRInst,
|
||||
is_signed: bool,
|
||||
) {
|
||||
debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let bits = ty_bits(ty);
|
||||
let narrow_mode = match (bits <= 32, is_signed) {
|
||||
|
@ -868,7 +897,6 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||
let ty = ctx.input_ty(insn, 0);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rd = writable_zero_reg();
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
|
@ -906,21 +934,17 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
|
|||
impl LowerBackend for AArch64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst)
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) {
|
||||
lower_inst::lower_insn_to_regs(ctx, ir_inst);
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
) {
|
||||
lower_inst::lower_branch(ctx, branches, targets, fallthrough)
|
||||
}
|
||||
|
||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||
Some(xreg(PINNED_REG))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,20 +1,17 @@
|
|||
//! Lower a single Cranelift instruction into vcode.
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::condcodes::FloatCC;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
||||
use crate::isa::aarch64::abi::*;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
|
||||
use regalloc::RegClass;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::SmallVec;
|
||||
|
@ -22,10 +19,7 @@ use smallvec::SmallVec;
|
|||
use super::lower::*;
|
||||
|
||||
/// Actually codegen an instruction's results into registers.
|
||||
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
) -> CodegenResult<()> {
|
||||
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
|
||||
.map(|i| InsnInput { insn, input: i })
|
||||
|
@ -41,17 +35,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
match op {
|
||||
Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
|
||||
let value = ctx.get_constant(insn).unwrap();
|
||||
let value = output_to_const(ctx, outputs[0]).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_u64(ctx, rd, value);
|
||||
}
|
||||
Opcode::F32const => {
|
||||
let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
|
||||
let value = output_to_const_f32(ctx, outputs[0]).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f32(ctx, rd, value);
|
||||
}
|
||||
Opcode::F64const => {
|
||||
let value = f64::from_bits(ctx.get_constant(insn).unwrap());
|
||||
let value = output_to_const_f64(ctx, outputs[0]).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f64(ctx, rd, value);
|
||||
}
|
||||
|
@ -85,8 +79,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
} else {
|
||||
VecALUOp::UQAddScalar
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
@ -97,7 +91,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec64 {
|
||||
rd,
|
||||
|
@ -117,8 +110,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
} else {
|
||||
VecALUOp::UQSubScalar
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let va = ctx.tmp(RegClass::V128, I128);
|
||||
let vb = ctx.tmp(RegClass::V128, I128);
|
||||
let ra = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rb = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
@ -129,7 +122,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec64 {
|
||||
rd,
|
||||
|
@ -279,7 +271,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
});
|
||||
|
@ -305,7 +297,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 20;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
@ -332,7 +324,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
nzcv: NZCV::new(false, false, false, false),
|
||||
cond: Cond::Eq,
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(12),
|
||||
kind: CondBrKind::Cond(Cond::Vc),
|
||||
});
|
||||
|
@ -345,7 +337,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
// Check for divide by 0.
|
||||
let branch_size = 8;
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(branch_size),
|
||||
kind: CondBrKind::NotZero(rm),
|
||||
});
|
||||
|
@ -501,7 +493,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, ty);
|
||||
let tmp = ctx.tmp(RegClass::I64, ty);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: tmp,
|
||||
|
@ -524,7 +516,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// Really ty_bits_size - rn, but the upper bits of the result are
|
||||
// ignored (because of the implicit masking done by the instruction),
|
||||
// so this is equivalent to negating the input.
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp,
|
||||
|
@ -537,7 +529,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
};
|
||||
|
||||
// Explicitly mask the rotation count.
|
||||
let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp_masked_rm = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op: ALUOp::And32,
|
||||
rd: tmp_masked_rm,
|
||||
|
@ -546,8 +538,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
});
|
||||
let tmp_masked_rm = tmp_masked_rm.to_reg();
|
||||
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: ALUOp::Sub32,
|
||||
rd: tmp1,
|
||||
|
@ -586,7 +578,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
immshift.imm &= ty_bits_size - 1;
|
||||
|
||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
ctx.emit(Inst::AluRRImmShift {
|
||||
alu_op: ALUOp::Lsr32,
|
||||
rd: tmp1,
|
||||
|
@ -691,7 +683,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// and fix the sequence below to work properly for this.
|
||||
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
|
||||
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
||||
// the rest of the code is identical to the 64-bit version.
|
||||
|
@ -878,7 +870,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
||||
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
||||
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
||||
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
}
|
||||
|
@ -918,7 +909,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
(32, true) => Inst::FpuStore32 { rd, mem, srcloc },
|
||||
(64, false) => Inst::Store64 { rd, mem, srcloc },
|
||||
(64, true) => Inst::FpuStore64 { rd, mem, srcloc },
|
||||
(128, _) => Inst::FpuStore128 { rd, mem, srcloc },
|
||||
_ => panic!("Unsupported size in store"),
|
||||
});
|
||||
}
|
||||
|
@ -1002,7 +992,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
}
|
||||
|
||||
Opcode::Bitselect => {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
|
@ -1155,66 +1145,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
(false, true) => NarrowValueMode::SignExtend64,
|
||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||
};
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
ctx.emit(Inst::CondSet { cond, rd });
|
||||
} else {
|
||||
if ty != I8X16 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"unsupported simd type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
|
||||
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
// 'Less than' operations are implemented by swapping
|
||||
// the order of operands and using the 'greater than'
|
||||
// instructions.
|
||||
// 'Not equal' is implemented with 'equal' and inverting
|
||||
// the result.
|
||||
let (alu_op, swap) = match cond {
|
||||
Cond::Eq => (VecALUOp::Cmeq, false),
|
||||
Cond::Ne => (VecALUOp::Cmeq, false),
|
||||
Cond::Ge => (VecALUOp::Cmge, false),
|
||||
Cond::Gt => (VecALUOp::Cmgt, false),
|
||||
Cond::Le => (VecALUOp::Cmge, true),
|
||||
Cond::Lt => (VecALUOp::Cmgt, true),
|
||||
Cond::Hs => (VecALUOp::Cmhs, false),
|
||||
Cond::Hi => (VecALUOp::Cmhi, false),
|
||||
Cond::Ls => (VecALUOp::Cmhs, true),
|
||||
Cond::Lo => (VecALUOp::Cmhi, true),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if swap {
|
||||
std::mem::swap(&mut rn, &mut rm);
|
||||
}
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
});
|
||||
|
||||
if cond == Cond::Ne {
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
ty: I8X16,
|
||||
});
|
||||
}
|
||||
}
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
ctx.emit(Inst::CondSet { cond, rd });
|
||||
}
|
||||
|
||||
Opcode::Fcmp => {
|
||||
|
@ -1252,15 +1188,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Trapif | Opcode::Trapff => {
|
||||
let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
|
||||
|
||||
let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
|
||||
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
// The flags must not have been clobbered by any other
|
||||
// instruction between the iadd_ifcout and this instruction, as
|
||||
// verified by the CLIF validator; so we can simply use the
|
||||
// flags here.
|
||||
cond
|
||||
} else if op == Opcode::Trapif {
|
||||
let cond = if op == Opcode::Trapif {
|
||||
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
|
@ -1283,7 +1211,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
// Branch around the break instruction with inverted cond. Go straight to lowered
|
||||
// one-target form; this is logically part of a single-in single-out template lowering.
|
||||
let cond = cond.invert();
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
|
@ -1305,12 +1233,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
Opcode::FuncAddr => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let (extname, _) = ctx.call_target(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let extname = ctx.call_target(insn).unwrap().clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
rd,
|
||||
name: Box::new(extname),
|
||||
name: extname,
|
||||
srcloc: loc,
|
||||
offset: 0,
|
||||
});
|
||||
|
@ -1322,12 +1249,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
Opcode::SymbolValue => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
|
||||
let (extname, offset) = ctx.symbol_value(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let loc = ctx.srcloc(insn);
|
||||
ctx.emit(Inst::LoadExtName {
|
||||
rd,
|
||||
name: Box::new(extname),
|
||||
name: extname,
|
||||
srcloc: loc,
|
||||
offset,
|
||||
});
|
||||
|
@ -1335,50 +1262,54 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
|
||||
Opcode::Call | Opcode::CallIndirect => {
|
||||
let loc = ctx.srcloc(insn);
|
||||
let (mut abi, inputs) = match op {
|
||||
let (abi, inputs) = match op {
|
||||
Opcode::Call => {
|
||||
let (extname, dist) = ctx.call_target(insn).unwrap();
|
||||
let extname = ctx.call_target(insn).unwrap();
|
||||
let extname = extname.clone();
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(
|
||||
AArch64ABICall::from_func(sig, &extname, dist, loc)?,
|
||||
&inputs[..],
|
||||
)
|
||||
(AArch64ABICall::from_func(sig, &extname, loc), &inputs[..])
|
||||
}
|
||||
Opcode::CallIndirect => {
|
||||
let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
||||
let sig = ctx.call_sig(insn).unwrap();
|
||||
assert!(inputs.len() - 1 == sig.params.len());
|
||||
assert!(outputs.len() == sig.returns.len());
|
||||
(AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
|
||||
(AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..])
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
abi.emit_stack_pre_adjust(ctx);
|
||||
for inst in abi.gen_stack_pre_adjust().into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
assert!(inputs.len() == abi.num_args());
|
||||
for (i, input) in inputs.iter().enumerate() {
|
||||
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
|
||||
abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
|
||||
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
for inst in abi.gen_call().into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
abi.emit_call(ctx);
|
||||
for (i, output) in outputs.iter().enumerate() {
|
||||
let retval_reg = output_to_reg(ctx, *output);
|
||||
abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
|
||||
ctx.emit(abi.gen_copy_retval_to_reg(i, retval_reg));
|
||||
}
|
||||
for inst in abi.gen_stack_post_adjust().into_iter() {
|
||||
ctx.emit(inst);
|
||||
}
|
||||
abi.emit_stack_post_adjust(ctx);
|
||||
}
|
||||
|
||||
Opcode::GetPinnedReg => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
|
||||
ctx.emit(Inst::GetPinnedReg { rd });
|
||||
}
|
||||
|
||||
Opcode::SetPinnedReg => {
|
||||
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
|
||||
ctx.emit(Inst::SetPinnedReg { rm });
|
||||
}
|
||||
|
||||
Opcode::Spill
|
||||
|
@ -1409,20 +1340,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
panic!("Branch opcode reached non-branch lowering logic!");
|
||||
}
|
||||
|
||||
Opcode::Vconst => {
|
||||
let value = output_to_const_f128(ctx, outputs[0]).unwrap();
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
lower_constant_f128(ctx, rd, value);
|
||||
}
|
||||
|
||||
Opcode::RawBitcast => {
|
||||
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
ctx.emit(Inst::gen_move(rd, rm, ty));
|
||||
}
|
||||
|
||||
Opcode::Shuffle
|
||||
Opcode::Vconst
|
||||
| Opcode::Shuffle
|
||||
| Opcode::Vsplit
|
||||
| Opcode::Vconcat
|
||||
| Opcode::Vselect
|
||||
|
@ -1431,20 +1350,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
| Opcode::Splat
|
||||
| Opcode::Insertlane
|
||||
| Opcode::Extractlane
|
||||
| Opcode::RawBitcast
|
||||
| Opcode::ScalarToVector
|
||||
| Opcode::Swizzle
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Uload8x8Complex
|
||||
| Opcode::Sload8x8
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Uload16x4Complex
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload32x2
|
||||
| Opcode::Uload32x2Complex
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Sload32x2Complex => {
|
||||
| Opcode::Sload32x2 => {
|
||||
// TODO
|
||||
panic!("Vector ops not implemented.");
|
||||
}
|
||||
|
@ -1538,38 +1452,54 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Fcopysign => {
|
||||
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
||||
//
|
||||
// This is a scalar Fcopysign.
|
||||
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
||||
// (64 bits for example, 32-bit sequence is analogous):
|
||||
//
|
||||
// mov vd, vn
|
||||
// ushr vtmp, vm, #63 / #31
|
||||
// sli vd, vtmp, #63 / #31
|
||||
// MOV Xtmp1, Dinput0
|
||||
// MOV Xtmp2, Dinput1
|
||||
// AND Xtmp2, 0x8000_0000_0000_0000
|
||||
// BIC Xtmp1, 0x8000_0000_0000_0000
|
||||
// ORR Xtmp1, Xtmp1, Xtmp2
|
||||
// MOV Doutput, Xtmp1
|
||||
|
||||
let ty = ctx.output_ty(insn, 0);
|
||||
let bits = ty_bits(ty) as u8;
|
||||
let bits = ty_bits(ty);
|
||||
assert!(bits == 32 || bits == 64);
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, F64);
|
||||
|
||||
// Copy LHS to rd.
|
||||
ctx.emit(Inst::FpuMove64 { rd, rn });
|
||||
|
||||
// Copy the sign bit to the lowest bit in tmp.
|
||||
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
let tmp1 = ctx.tmp(RegClass::I64, I64);
|
||||
let tmp2 = ctx.tmp(RegClass::I64, I64);
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
||||
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
||||
let imml = if bits == 32 {
|
||||
ImmLogic::maybe_from_u64(0x8000_0000, I32).unwrap()
|
||||
} else {
|
||||
ImmLogic::maybe_from_u64(0x8000_0000_0000_0000, I64).unwrap()
|
||||
};
|
||||
let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op,
|
||||
rd: tmp2,
|
||||
rn: tmp2.to_reg(),
|
||||
imml: imml.clone(),
|
||||
});
|
||||
|
||||
// Insert the bit from tmp into the sign bit of rd.
|
||||
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
|
||||
let alu_op = choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64);
|
||||
ctx.emit(Inst::AluRRImmLogic {
|
||||
alu_op,
|
||||
rd: tmp1,
|
||||
rn: tmp1.to_reg(),
|
||||
imml,
|
||||
});
|
||||
let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: tmp1,
|
||||
rn: tmp1.to_reg(),
|
||||
rm: tmp2.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::MovToVec64 {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
rn: tmp1.to_reg(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1601,14 +1531,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
} else {
|
||||
ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
|
||||
}
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
|
||||
});
|
||||
let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
|
||||
ctx.emit(Inst::Udf { trap_info });
|
||||
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
let tmp = ctx.tmp(RegClass::V128, I128);
|
||||
|
||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
||||
// we allow values that are slightly out of range:
|
||||
|
@ -1642,7 +1572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
|
@ -1655,7 +1585,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
|
@ -1685,7 +1615,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
|
||||
});
|
||||
|
@ -1698,7 +1628,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
target: BranchTarget::ResolvedOffset(8),
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
|
||||
});
|
||||
|
@ -1774,8 +1704,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp1 = ctx.tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
|
@ -1860,35 +1790,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
});
|
||||
}
|
||||
|
||||
Opcode::IaddIfcout => {
|
||||
// This is a two-output instruction that is needed for the
|
||||
// legalizer's explicit heap-check sequence, among possible other
|
||||
// uses. Its second output is a flags output only ever meant to
|
||||
// check for overflow using the
|
||||
// `backend.unsigned_add_overflow_condition()` condition.
|
||||
//
|
||||
// Note that the CLIF validation will ensure that no flag-setting
|
||||
// operation comes between this IaddIfcout and its use (e.g., a
|
||||
// Trapif). Thus, we can rely on implicit communication through the
|
||||
// processor flags rather than explicitly generating flags into a
|
||||
// register. We simply use the variant of the add instruction that
|
||||
// sets flags (`adds`) here.
|
||||
|
||||
// Ensure that the second output isn't directly called for: it
|
||||
// should only be used by a flags-consuming op, which will directly
|
||||
// understand this instruction and merge the comparison.
|
||||
assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg()));
|
||||
|
||||
// Now handle the iadd as above, except use an AddS opcode that sets
|
||||
// flags.
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
|
@ -1899,6 +1800,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
| Opcode::IaddCin
|
||||
| Opcode::IaddIfcin
|
||||
| Opcode::IaddCout
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::IaddCarry
|
||||
| Opcode::IaddIfcarry
|
||||
| Opcode::IsubBin
|
||||
|
@ -1947,8 +1849,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
| Opcode::X86Pmaxu
|
||||
| Opcode::X86Pmins
|
||||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Packss
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
|
@ -1960,16 +1860,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||
Opcode::AvgRound => unimplemented!(),
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
) {
|
||||
// A block should end with at most two branches. The first may be a
|
||||
// conditional branch; a conditional branch can be followed only by an
|
||||
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
||||
|
@ -1983,14 +1881,18 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
let op0 = ctx.data(branches[0]).opcode();
|
||||
let op1 = ctx.data(branches[1]).opcode();
|
||||
|
||||
//println!(
|
||||
// "lowering two-branch group: opcodes are {:?} and {:?}",
|
||||
// op0, op1
|
||||
//);
|
||||
|
||||
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
|
||||
let taken = BranchTarget::Label(targets[0]);
|
||||
let taken = BranchTarget::Block(targets[0]);
|
||||
let not_taken = match op1 {
|
||||
Opcode::Jump => BranchTarget::Label(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
|
||||
Opcode::Jump => BranchTarget::Block(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Block(fallthrough.unwrap()),
|
||||
_ => unreachable!(), // assert above.
|
||||
};
|
||||
|
||||
match op0 {
|
||||
Opcode::Brz | Opcode::Brnz => {
|
||||
let flag_input = InsnInput {
|
||||
|
@ -2050,8 +1952,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
Opcode::BrIcmp => {
|
||||
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
let ty = ctx.input_ty(branches[0], 0);
|
||||
let bits = ty_bits(ty);
|
||||
|
@ -2084,15 +1984,13 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::Brif => {
|
||||
let condcode = inst_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
|
||||
let is_signed = condcode_is_signed(condcode);
|
||||
let flag_input = InsnInput {
|
||||
insn: branches[0],
|
||||
|
@ -2103,7 +2001,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
} else {
|
||||
// If the ifcmp result is actually placed in a
|
||||
|
@ -2113,7 +2011,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2121,7 +2019,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
Opcode::Brff => {
|
||||
let condcode = inst_fp_condcode(ctx.data(branches[0])).unwrap();
|
||||
let cond = lower_fp_condcode(condcode);
|
||||
let kind = CondBrKind::Cond(cond);
|
||||
let flag_input = InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
|
@ -2131,7 +2028,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
} else {
|
||||
// If the ffcmp result is actually placed in a
|
||||
|
@ -2141,7 +2038,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
ctx.emit(Inst::CondBr {
|
||||
taken,
|
||||
not_taken,
|
||||
kind,
|
||||
kind: CondBrKind::Cond(cond),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2158,15 +2055,12 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
// fills in `targets[0]` with our fallthrough block, so this
|
||||
// is valid for both Jump and Fallthrough.
|
||||
ctx.emit(Inst::Jump {
|
||||
dest: BranchTarget::Label(targets[0]),
|
||||
dest: BranchTarget::Block(targets[0]),
|
||||
});
|
||||
}
|
||||
Opcode::BrTable => {
|
||||
// Expand `br_table index, default, JT` to:
|
||||
//
|
||||
// emit_island // this forces an island at this point
|
||||
// // if the jumptable would push us past
|
||||
// // the deadline
|
||||
// subs idx, #jt_size
|
||||
// b.hs default
|
||||
// adr vTmp1, PC+16
|
||||
|
@ -2176,11 +2070,6 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
// [jumptable offsets relative to JT base]
|
||||
let jt_size = targets.len() - 1;
|
||||
assert!(jt_size <= std::u32::MAX as usize);
|
||||
|
||||
ctx.emit(Inst::EmitIsland {
|
||||
needed_space: 4 * (6 + jt_size) as CodeOffset,
|
||||
});
|
||||
|
||||
let ridx = input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
|
@ -2190,8 +2079,8 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
NarrowValueMode::ZeroExtend32,
|
||||
);
|
||||
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rtmp1 = ctx.tmp(RegClass::I64, I32);
|
||||
let rtmp2 = ctx.tmp(RegClass::I64, I32);
|
||||
|
||||
// Bounds-check and branch to default.
|
||||
if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
|
||||
|
@ -2210,10 +2099,10 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
}
|
||||
let default_target = BranchTarget::Label(targets[0]);
|
||||
ctx.emit(Inst::OneWayCondBr {
|
||||
target: default_target.clone(),
|
||||
let default_target = BranchTarget::Block(targets[0]);
|
||||
ctx.emit(Inst::CondBrLowered {
|
||||
kind: CondBrKind::Cond(Cond::Hs), // unsigned >=
|
||||
target: default_target.clone(),
|
||||
});
|
||||
|
||||
// Emit the compound instruction that does:
|
||||
|
@ -2234,23 +2123,19 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
|
|||
let jt_targets: Vec<BranchTarget> = targets
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|bix| BranchTarget::Label(*bix))
|
||||
.map(|bix| BranchTarget::Block(*bix))
|
||||
.collect();
|
||||
let targets_for_term: Vec<MachLabel> = targets.to_vec();
|
||||
let targets_for_term: Vec<BlockIndex> = targets.to_vec();
|
||||
ctx.emit(Inst::JTSequence {
|
||||
ridx,
|
||||
rtmp1,
|
||||
rtmp2,
|
||||
info: Box::new(JTSequenceInfo {
|
||||
targets: jt_targets,
|
||||
targets_for_term: targets_for_term,
|
||||
}),
|
||||
targets: jt_targets,
|
||||
targets_for_term,
|
||||
});
|
||||
}
|
||||
|
||||
_ => panic!("Unknown branch type!"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{
|
||||
|
@ -16,7 +15,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, Triple};
|
|||
|
||||
// New backend:
|
||||
mod abi;
|
||||
pub(crate) mod inst;
|
||||
mod inst;
|
||||
mod lower;
|
||||
mod lower_inst;
|
||||
|
||||
|
@ -26,18 +25,12 @@ use inst::create_reg_universe;
|
|||
pub struct AArch64Backend {
|
||||
triple: Triple,
|
||||
flags: settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl AArch64Backend {
|
||||
/// Create a new AArch64 backend with the given (shared) flags.
|
||||
pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
|
||||
let reg_universe = create_reg_universe(&flags);
|
||||
AArch64Backend {
|
||||
triple,
|
||||
flags,
|
||||
reg_universe,
|
||||
}
|
||||
AArch64Backend { triple, flags }
|
||||
}
|
||||
|
||||
/// This performs lowering to VCode, register-allocates the code, computes block layout and
|
||||
|
@ -47,7 +40,7 @@ impl AArch64Backend {
|
|||
func: &Function,
|
||||
flags: settings::Flags,
|
||||
) -> CodegenResult<VCode<inst::Inst>> {
|
||||
let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
|
||||
let abi = Box::new(abi::AArch64ABIBody::new(func, flags));
|
||||
compile::compile::<AArch64Backend>(func, self, abi)
|
||||
}
|
||||
}
|
||||
|
@ -60,7 +53,7 @@ impl MachBackend for AArch64Backend {
|
|||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags.clone())?;
|
||||
let buffer = vcode.emit();
|
||||
let sections = vcode.emit();
|
||||
let frame_size = vcode.frame_size();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
|
@ -69,10 +62,8 @@ impl MachBackend for AArch64Backend {
|
|||
None
|
||||
};
|
||||
|
||||
let buffer = buffer.finish();
|
||||
|
||||
Ok(MachCompileResult {
|
||||
buffer,
|
||||
sections,
|
||||
frame_size,
|
||||
disasm,
|
||||
})
|
||||
|
@ -90,21 +81,8 @@ impl MachBackend for AArch64Backend {
|
|||
&self.flags
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
&self.reg_universe
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on
|
||||
// overflow of an add.
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on
|
||||
// underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the
|
||||
// opposite of x86).
|
||||
IntCC::UnsignedLessThan
|
||||
fn reg_universe(&self) -> RealRegUniverse {
|
||||
create_reg_universe(&self.flags)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,8 +134,8 @@ mod test {
|
|||
Triple::from_str("aarch64").unwrap(),
|
||||
settings::Flags::new(shared_flags),
|
||||
);
|
||||
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
|
||||
let code = &buffer.data[..];
|
||||
let sections = backend.compile_function(&mut func, false).unwrap().sections;
|
||||
let code = §ions.sections[0].data;
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
|
@ -171,7 +149,7 @@ mod test {
|
|||
0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
assert_eq!(code, &golden);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -214,32 +192,34 @@ mod test {
|
|||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = &result.buffer.data[..];
|
||||
let code = &result.sections.sections[0].data;
|
||||
|
||||
// stp x29, x30, [sp, #-16]!
|
||||
// mov x29, sp
|
||||
// mov x1, #0x1234 // #4660
|
||||
// add w0, w0, w1
|
||||
// mov w1, w0
|
||||
// cbnz x1, 0x28
|
||||
// mov x1, #0x1234 // #4660
|
||||
// add w1, w0, w1
|
||||
// mov w1, w1
|
||||
// cbnz x1, 0x18
|
||||
// mov w1, w0
|
||||
// cbnz x1, 0x18
|
||||
// mov x1, #0x1234 // #4660
|
||||
// sub w0, w0, w1
|
||||
// mov x1, x0
|
||||
// mov x0, #0x1234
|
||||
// add w1, w1, w0
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// mov w2, w1
|
||||
// cbz x2, ...
|
||||
// sub w0, w1, w0
|
||||
// mov sp, x29
|
||||
// ldp x29, x30, [sp], #16
|
||||
// ret
|
||||
// add w2, w1, w0
|
||||
// mov w2, w2
|
||||
// cbnz x2, ... <---- compound branch (cond / uncond)
|
||||
// b ... <----
|
||||
|
||||
let golden = vec![
|
||||
253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
|
||||
0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
|
||||
0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
|
||||
193, 168, 192, 3, 95, 214,
|
||||
0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0xe1, 0x03, 0x00, 0xaa, 0x80, 0x46,
|
||||
0x82, 0xd2, 0x21, 0x00, 0x00, 0x0b, 0xe2, 0x03, 0x01, 0x2a, 0xe2, 0x00, 0x00, 0xb4,
|
||||
0xe2, 0x03, 0x01, 0x2a, 0xa2, 0x00, 0x00, 0xb5, 0x20, 0x00, 0x00, 0x4b, 0xbf, 0x03,
|
||||
0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6, 0x22, 0x00, 0x00, 0x0b,
|
||||
0xe2, 0x03, 0x02, 0x2a, 0xc2, 0xff, 0xff, 0xb5, 0xf7, 0xff, 0xff, 0x17,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
assert_eq!(code, &golden);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
|||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{Architecture, Triple};
|
||||
|
||||
|
@ -136,10 +135,6 @@ impl TargetIsa for Isa {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
|
|
|
@ -66,7 +66,6 @@ use crate::settings::SetResult;
|
|||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
|
||||
|
@ -78,14 +77,11 @@ mod riscv;
|
|||
#[cfg(feature = "x86")]
|
||||
mod x86;
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
mod x64;
|
||||
|
||||
#[cfg(feature = "arm32")]
|
||||
mod arm32;
|
||||
|
||||
#[cfg(feature = "arm64")]
|
||||
pub(crate) mod aarch64;
|
||||
mod aarch64;
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
pub mod unwind;
|
||||
|
@ -423,10 +419,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
|||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait
|
||||
/// with `isa.as_any().downcast_ref::<isa::foo::Isa>()`.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
impl Debug for &dyn TargetIsa {
|
||||
|
|
|
@ -17,7 +17,6 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
|||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -131,10 +130,6 @@ impl TargetIsa for Isa {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -168,7 +163,7 @@ mod tests {
|
|||
let arg32 = func.dfg.append_block_param(block, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm64 {
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
|
@ -181,7 +176,7 @@ mod tests {
|
|||
);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm64 {
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
|
@ -191,7 +186,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV64.
|
||||
let inst32 = InstructionData::BinaryImm64 {
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
|
@ -219,7 +214,7 @@ mod tests {
|
|||
let arg32 = func.dfg.append_block_param(block, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm64 {
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
|
@ -229,7 +224,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64, types::I64).is_err());
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm64 {
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
|
@ -239,7 +234,7 @@ mod tests {
|
|||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV32.
|
||||
let inst32 = InstructionData::BinaryImm64 {
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
|
|
|
@ -8,6 +8,7 @@ use thiserror::Error;
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
type Register = u16;
|
||||
type Expression = Vec<u8>;
|
||||
|
||||
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
|
||||
#[allow(missing_docs)]
|
||||
|
@ -22,8 +23,6 @@ pub enum RegisterMappingError {
|
|||
}
|
||||
|
||||
// This mirrors gimli's CallFrameInstruction, but is serializable
|
||||
// This excludes CfaExpression, Expression, ValExpression due to
|
||||
// https://github.com/gimli-rs/gimli/issues/513.
|
||||
// TODO: if gimli ever adds serialization support, remove this type
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
|
@ -31,12 +30,15 @@ pub(crate) enum CallFrameInstruction {
|
|||
Cfa(Register, i32),
|
||||
CfaRegister(Register),
|
||||
CfaOffset(i32),
|
||||
CfaExpression(Expression),
|
||||
Restore(Register),
|
||||
Undefined(Register),
|
||||
SameValue(Register),
|
||||
Offset(Register, i32),
|
||||
ValOffset(Register, i32),
|
||||
Register(Register, Register),
|
||||
Expression(Register, Expression),
|
||||
ValExpression(Register, Expression),
|
||||
RememberState,
|
||||
RestoreState,
|
||||
ArgsSize(u32),
|
||||
|
@ -50,33 +52,34 @@ impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
|||
CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
|
||||
CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
|
||||
CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
|
||||
CallFrameInstruction::CfaExpression(expr) => Self::CfaExpression(expr.0),
|
||||
CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
|
||||
CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
|
||||
CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
|
||||
CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
|
||||
CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
|
||||
CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
|
||||
CallFrameInstruction::Expression(reg, expr) => Self::Expression(reg.0, expr.0),
|
||||
CallFrameInstruction::ValExpression(reg, expr) => Self::ValExpression(reg.0, expr.0),
|
||||
CallFrameInstruction::RememberState => Self::RememberState,
|
||||
CallFrameInstruction::RestoreState => Self::RestoreState,
|
||||
CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
|
||||
_ => {
|
||||
// Cranelift's unwind support does not generate `CallFrameInstruction`s with
|
||||
// Expression at this moment, and it is not trivial to
|
||||
// serialize such instructions.
|
||||
panic!("CallFrameInstruction with Expression not supported");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
||||
fn into(self) -> gimli::write::CallFrameInstruction {
|
||||
use gimli::{write::CallFrameInstruction, Register};
|
||||
use gimli::{
|
||||
write::{CallFrameInstruction, Expression},
|
||||
Register,
|
||||
};
|
||||
|
||||
match self {
|
||||
Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
|
||||
Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
|
||||
Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
|
||||
Self::CfaExpression(expr) => CallFrameInstruction::CfaExpression(Expression(expr)),
|
||||
Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
|
||||
Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
|
||||
Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
|
||||
|
@ -85,6 +88,12 @@ impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
|
|||
Self::Register(reg1, reg2) => {
|
||||
CallFrameInstruction::Register(Register(reg1), Register(reg2))
|
||||
}
|
||||
Self::Expression(reg, expr) => {
|
||||
CallFrameInstruction::Expression(Register(reg), Expression(expr))
|
||||
}
|
||||
Self::ValExpression(reg, expr) => {
|
||||
CallFrameInstruction::ValExpression(Register(reg), Expression(expr))
|
||||
}
|
||||
Self::RememberState => CallFrameInstruction::RememberState,
|
||||
Self::RestoreState => CallFrameInstruction::RestoreState,
|
||||
Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
|
||||
|
|
|
@ -1,467 +0,0 @@
|
|||
//! Implementation of the standard x64 ABI.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
|
||||
use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type};
|
||||
use crate::isa::{self, x64::inst::*};
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use args::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ABIArg {
|
||||
Reg(RealReg),
|
||||
_Stack,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ABIRet {
|
||||
Reg(RealReg),
|
||||
_Stack,
|
||||
}
|
||||
|
||||
pub(crate) struct X64ABIBody {
|
||||
args: Vec<ABIArg>,
|
||||
rets: Vec<ABIRet>,
|
||||
|
||||
/// Offsets to each stack slot.
|
||||
_stack_slots: Vec<usize>,
|
||||
|
||||
/// Total stack size of all the stack slots.
|
||||
stack_slots_size: usize,
|
||||
|
||||
/// Clobbered registers, as indicated by regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
|
||||
/// Total number of spill slots, as indicated by regalloc.
|
||||
num_spill_slots: Option<usize>,
|
||||
|
||||
/// Calculated while creating the prologue, and used when creating the epilogue. Amount by
|
||||
/// which RSP is adjusted downwards to allocate the spill area.
|
||||
frame_size_bytes: Option<usize>,
|
||||
|
||||
call_conv: isa::CallConv,
|
||||
|
||||
/// The settings controlling this function's compilation.
|
||||
flags: settings::Flags,
|
||||
}
|
||||
|
||||
fn in_int_reg(ty: types::Type) -> bool {
|
||||
match ty {
|
||||
types::I8
|
||||
| types::I16
|
||||
| types::I32
|
||||
| types::I64
|
||||
| types::B1
|
||||
| types::B8
|
||||
| types::B16
|
||||
| types::B32
|
||||
| types::B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rdi()),
|
||||
1 => Some(regs::rsi()),
|
||||
2 => Some(regs::rdx()),
|
||||
3 => Some(regs::rcx()),
|
||||
4 => Some(regs::r8()),
|
||||
5 => Some(regs::r9()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::rax()),
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_callee_save_systemv(r: RealReg) -> bool {
|
||||
use regs::*;
|
||||
match r.get_class() {
|
||||
RegClass::I64 => match r.get_hw_encoding() as u8 {
|
||||
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
|
||||
_ => false,
|
||||
},
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_callee_saves(regs: Vec<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
|
||||
regs.into_iter()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl X64ABIBody {
|
||||
/// Create a new body ABI instance.
|
||||
pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> Self {
|
||||
// Compute args and retvals from signature.
|
||||
let mut args = vec![];
|
||||
let mut next_int_arg = 0;
|
||||
for param in &f.signature.params {
|
||||
match param.purpose {
|
||||
ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
|
||||
// `VMContext` is `r14` in Baldrdash.
|
||||
args.push(ABIArg::Reg(regs::r14().to_real_reg()));
|
||||
}
|
||||
|
||||
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
|
||||
if in_int_reg(param.value_type) {
|
||||
if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
|
||||
args.push(ABIArg::Reg(reg.to_real_reg()));
|
||||
} else {
|
||||
unimplemented!("passing arg on the stack");
|
||||
}
|
||||
next_int_arg += 1;
|
||||
} else {
|
||||
unimplemented!("non int normal register")
|
||||
}
|
||||
}
|
||||
|
||||
_ => unimplemented!("other parameter purposes"),
|
||||
}
|
||||
}
|
||||
|
||||
let mut rets = vec![];
|
||||
let mut next_int_retval = 0;
|
||||
for ret in &f.signature.returns {
|
||||
match ret.purpose {
|
||||
ir::ArgumentPurpose::Normal => {
|
||||
if in_int_reg(ret.value_type) {
|
||||
if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
|
||||
rets.push(ABIRet::Reg(reg.to_real_reg()));
|
||||
} else {
|
||||
unimplemented!("passing return on the stack");
|
||||
}
|
||||
next_int_retval += 1;
|
||||
} else {
|
||||
unimplemented!("returning non integer normal value");
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
unimplemented!("non normal argument purpose");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute stackslot locations and total stackslot size.
|
||||
let mut stack_offset: usize = 0;
|
||||
let mut _stack_slots = vec![];
|
||||
for (stackslot, data) in f.stack_slots.iter() {
|
||||
let off = stack_offset;
|
||||
stack_offset += data.size as usize;
|
||||
|
||||
// 8-bit align.
|
||||
stack_offset = (stack_offset + 7) & !7usize;
|
||||
|
||||
debug_assert_eq!(stackslot.as_u32() as usize, _stack_slots.len());
|
||||
_stack_slots.push(off);
|
||||
}
|
||||
|
||||
Self {
|
||||
args,
|
||||
rets,
|
||||
_stack_slots,
|
||||
stack_slots_size: stack_offset,
|
||||
clobbered: Set::empty(),
|
||||
num_spill_slots: None,
|
||||
frame_size_bytes: None,
|
||||
call_conv: f.signature.call_conv.clone(),
|
||||
flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ABIBody for X64ABIBody {
|
||||
type I = Inst;
|
||||
|
||||
fn temp_needed(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn init(&mut self, _: Option<Writable<Reg>>) {}
|
||||
|
||||
fn flags(&self) -> &settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn num_retvals(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn num_stackslots(&self) -> usize {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for arg in &self.args {
|
||||
if let &ABIArg::Reg(r) = arg {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for ret in &self.rets {
|
||||
if let &ABIRet::Reg(r) = ret {
|
||||
set.insert(r);
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable<Reg>) -> Inst {
|
||||
match &self.args[idx] {
|
||||
ABIArg::Reg(from_reg) => {
|
||||
if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
|
||||
// TODO do we need a sign extension if it's I32?
|
||||
return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
|
||||
}
|
||||
unimplemented!("moving from non-int arg to vreg");
|
||||
}
|
||||
ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_retval_area_setup(&self) -> Option<Inst> {
|
||||
None
|
||||
}
|
||||
|
||||
fn gen_copy_reg_to_retval(
|
||||
&self,
|
||||
idx: usize,
|
||||
from_reg: Writable<Reg>,
|
||||
ext: ArgumentExtension,
|
||||
) -> Vec<Inst> {
|
||||
match ext {
|
||||
ArgumentExtension::None => {}
|
||||
_ => unimplemented!(
|
||||
"unimplemented argument extension {:?} is required for baldrdash",
|
||||
ext
|
||||
),
|
||||
};
|
||||
|
||||
let mut ret = Vec::new();
|
||||
match &self.rets[idx] {
|
||||
ABIRet::Reg(to_reg) => {
|
||||
if to_reg.get_class() == RegClass::I32 || to_reg.get_class() == RegClass::I64 {
|
||||
ret.push(Inst::mov_r_r(
|
||||
/*is64=*/ true,
|
||||
from_reg.to_reg(),
|
||||
Writable::<Reg>::from_reg(to_reg.to_reg()),
|
||||
))
|
||||
} else {
|
||||
unimplemented!("moving from vreg to non-int return value");
|
||||
}
|
||||
}
|
||||
|
||||
ABIRet::_Stack => {
|
||||
unimplemented!("moving from vreg to stack return value");
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
fn gen_ret(&self) -> Inst {
|
||||
Inst::ret()
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder(&self) -> Inst {
|
||||
Inst::epilogue_placeholder()
|
||||
}
|
||||
|
||||
fn set_num_spillslots(&mut self, slots: usize) {
|
||||
self.num_spill_slots = Some(slots);
|
||||
}
|
||||
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
|
||||
self.clobbered = clobbered;
|
||||
}
|
||||
|
||||
fn stackslot_addr(&self, _slot: StackSlot, _offset: u32, _into_reg: Writable<Reg>) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn load_stackslot(
|
||||
&self,
|
||||
_slot: StackSlot,
|
||||
_offset: u32,
|
||||
_ty: Type,
|
||||
_into_reg: Writable<Reg>,
|
||||
) -> Inst {
|
||||
unimplemented!("load_stackslot")
|
||||
}
|
||||
|
||||
fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst {
|
||||
unimplemented!("store_stackslot")
|
||||
}
|
||||
|
||||
fn load_spillslot(&self, _slot: SpillSlot, _ty: Type, _into_reg: Writable<Reg>) -> Inst {
|
||||
unimplemented!("load_spillslot")
|
||||
}
|
||||
|
||||
fn store_spillslot(&self, _slot: SpillSlot, _ty: Type, _from_reg: Reg) -> Inst {
|
||||
unimplemented!("store_spillslot")
|
||||
}
|
||||
|
||||
fn gen_prologue(&mut self) -> Vec<Inst> {
|
||||
let r_rsp = regs::rsp();
|
||||
|
||||
let mut insts = vec![];
|
||||
|
||||
// Baldrdash generates its own prologue sequence, so we don't have to.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let r_rbp = regs::rbp();
|
||||
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
|
||||
|
||||
// The "traditional" pre-preamble
|
||||
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
|
||||
insts.push(Inst::push64(RMI::reg(r_rbp)));
|
||||
// RSP is now 0 % 16
|
||||
insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
|
||||
}
|
||||
|
||||
// Save callee saved registers that we trash. Keep track of how much space we've used, so
|
||||
// as to know what we have to do to get the base of the spill area 0 % 16.
|
||||
let mut callee_saved_used = 0;
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
for reg in clobbered {
|
||||
let r_reg = reg.to_reg();
|
||||
match r_reg.get_class() {
|
||||
RegClass::I64 => {
|
||||
insts.push(Inst::push64(RMI::reg(r_reg.to_reg())));
|
||||
callee_saved_used += 8;
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap();
|
||||
if self.call_conv.extends_baldrdash() {
|
||||
// Baldrdash expects the stack to take at least the number of words set in
|
||||
// baldrdash_prologue_words; count them here.
|
||||
debug_assert!(
|
||||
!self.flags.enable_probestack(),
|
||||
"baldrdash does not expect cranelift to emit stack probes"
|
||||
);
|
||||
total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8;
|
||||
}
|
||||
|
||||
debug_assert!(callee_saved_used % 16 == 0 || callee_saved_used % 16 == 8);
|
||||
let frame_size = total_stacksize + callee_saved_used % 16;
|
||||
|
||||
// Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body.
|
||||
let frame_size = (frame_size + 15) & !15;
|
||||
if frame_size > 0x7FFF_FFFF {
|
||||
unimplemented!("gen_prologue(x86): total_stacksize >= 2G");
|
||||
}
|
||||
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
// Explicitly allocate the frame.
|
||||
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
|
||||
if frame_size > 0 {
|
||||
insts.push(Inst::alu_rmi_r(
|
||||
true,
|
||||
RMI_R_Op::Sub,
|
||||
RMI::imm(frame_size as u32),
|
||||
w_rsp,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Stash this value. We'll need it for the epilogue.
|
||||
debug_assert!(self.frame_size_bytes.is_none());
|
||||
self.frame_size_bytes = Some(frame_size);
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_epilogue(&self) -> Vec<Inst> {
|
||||
let mut insts = vec![];
|
||||
|
||||
// Undo what we did in the prologue.
|
||||
|
||||
// Clear the spill area and the 16-alignment padding below it.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let frame_size = self.frame_size_bytes.unwrap();
|
||||
if frame_size > 0 {
|
||||
let r_rsp = regs::rsp();
|
||||
let w_rsp = Writable::<Reg>::from_reg(r_rsp);
|
||||
|
||||
insts.push(Inst::alu_rmi_r(
|
||||
true,
|
||||
RMI_R_Op::Add,
|
||||
RMI::imm(frame_size as u32),
|
||||
w_rsp,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Restore regs.
|
||||
let clobbered = get_callee_saves(self.clobbered.to_vec());
|
||||
for w_real_reg in clobbered.into_iter().rev() {
|
||||
match w_real_reg.to_reg().get_class() {
|
||||
RegClass::I64 => {
|
||||
// TODO: make these conversion sequences less cumbersome.
|
||||
insts.push(Inst::pop64(Writable::<Reg>::from_reg(
|
||||
w_real_reg.to_reg().to_reg(),
|
||||
)))
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
// Baldrdash generates its own preamble.
|
||||
if !self.call_conv.extends_baldrdash() {
|
||||
let r_rbp = regs::rbp();
|
||||
let w_rbp = Writable::<Reg>::from_reg(r_rbp);
|
||||
|
||||
// Undo the "traditional" pre-preamble
|
||||
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
|
||||
insts.push(Inst::pop64(w_rbp));
|
||||
insts.push(Inst::ret());
|
||||
}
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn frame_size(&self) -> u32 {
|
||||
self.frame_size_bytes
|
||||
.expect("frame size not computed before prologue generation") as u32
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
(RegClass::I64, _) => 1,
|
||||
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
|
||||
(RegClass::V128, _) => 2,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_spill(&self, _to_slot: SpillSlot, _from_reg: RealReg, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn gen_reload(&self, _to_reg: Writable<RealReg>, _from_slot: SpillSlot, _ty: Type) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
|
@ -1,420 +0,0 @@
|
|||
//! Instruction operand sub-components (aka "parts"): definitions and printing.
|
||||
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
|
||||
|
||||
use crate::machinst::*;
|
||||
|
||||
use super::regs::show_ireg_sized;
|
||||
|
||||
/// A Memory Address. These denote a 64-bit value only.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Addr {
|
||||
/// Immediate sign-extended and a Register.
|
||||
IR { simm32: u32, base: Reg },
|
||||
|
||||
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
|
||||
IRRS {
|
||||
simm32: u32,
|
||||
base: Reg,
|
||||
index: Reg,
|
||||
shift: u8, /* 0 .. 3 only */
|
||||
},
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
// Constructors.
|
||||
|
||||
pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
Self::IR { simm32, base }
|
||||
}
|
||||
|
||||
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
debug_assert!(index.get_class() == RegClass::I64);
|
||||
debug_assert!(shift <= 3);
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base,
|
||||
index,
|
||||
shift,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
Addr::IR { simm32: _, base } => {
|
||||
collector.add_use(*base);
|
||||
}
|
||||
Addr::IRRS {
|
||||
simm32: _,
|
||||
base,
|
||||
index,
|
||||
shift: _,
|
||||
} => {
|
||||
collector.add_use(*base);
|
||||
collector.add_use(*index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Addr {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base,
|
||||
index,
|
||||
shift,
|
||||
} => format!(
|
||||
"{}({},{},{})",
|
||||
*simm32 as i32,
|
||||
base.show_rru(mb_rru),
|
||||
index.show_rru(mb_rru),
|
||||
1 << shift
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
|
||||
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
|
||||
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
|
||||
/// `simm32` is its sign-extension out to 64 bits.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RMI {
|
||||
R { reg: Reg },
|
||||
M { addr: Addr },
|
||||
I { simm32: u32 },
|
||||
}
|
||||
|
||||
impl RMI {
|
||||
// Constructors
|
||||
|
||||
pub(crate) fn reg(reg: Reg) -> RMI {
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
RMI::R { reg }
|
||||
}
|
||||
pub(crate) fn mem(addr: Addr) -> RMI {
|
||||
RMI::M { addr }
|
||||
}
|
||||
pub(crate) fn imm(simm32: u32) -> RMI {
|
||||
RMI::I { simm32 }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RMI::R { reg } => collector.add_use(*reg),
|
||||
RMI::M { addr } => addr.get_regs_as_uses(collector),
|
||||
RMI::I { simm32: _ } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RMI {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.show_rru_sized(mb_rru, 8)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RMI::M { addr } => addr.show_rru(mb_rru),
|
||||
RMI::I { simm32 } => format!("${}", *simm32 as i32),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
|
||||
/// 32 or 64 bit value.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum RM {
|
||||
R { reg: Reg },
|
||||
M { addr: Addr },
|
||||
}
|
||||
|
||||
impl RM {
|
||||
// Constructors.
|
||||
|
||||
pub(crate) fn reg(reg: Reg) -> Self {
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
RM::R { reg }
|
||||
}
|
||||
|
||||
pub(crate) fn mem(addr: Addr) -> Self {
|
||||
RM::M { addr }
|
||||
}
|
||||
|
||||
/// Add the regs mentioned by `self` to `collector`.
|
||||
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
|
||||
match self {
|
||||
RM::R { reg } => collector.add_use(*reg),
|
||||
RM::M { addr } => addr.get_regs_as_uses(collector),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RM {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.show_rru_sized(mb_rru, 8)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
match self {
|
||||
RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
|
||||
RM::M { addr } => addr.show_rru(mb_rru),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum RMI_R_Op {
|
||||
Add,
|
||||
Sub,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
|
||||
Mul,
|
||||
}
|
||||
|
||||
impl RMI_R_Op {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
RMI_R_Op::Add => "add".to_string(),
|
||||
RMI_R_Op::Sub => "sub".to_string(),
|
||||
RMI_R_Op::And => "and".to_string(),
|
||||
RMI_R_Op::Or => "or".to_string(),
|
||||
RMI_R_Op::Xor => "xor".to_string(),
|
||||
RMI_R_Op::Mul => "imul".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RMI_R_Op {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate ways of extending (widening) a value, using the Intel naming:
|
||||
/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum ExtMode {
|
||||
/// Byte -> Longword.
|
||||
BL,
|
||||
/// Byte -> Quadword.
|
||||
BQ,
|
||||
/// Word -> Longword.
|
||||
WL,
|
||||
/// Word -> Quadword.
|
||||
WQ,
|
||||
/// Longword -> Quadword.
|
||||
LQ,
|
||||
}
|
||||
|
||||
impl ExtMode {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
ExtMode::BL => "bl".to_string(),
|
||||
ExtMode::BQ => "bq".to_string(),
|
||||
ExtMode::WL => "wl".to_string(),
|
||||
ExtMode::WQ => "wq".to_string(),
|
||||
ExtMode::LQ => "lq".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn dst_size(&self) -> u8 {
|
||||
match self {
|
||||
ExtMode::BL => 4,
|
||||
ExtMode::BQ => 8,
|
||||
ExtMode::WL => 4,
|
||||
ExtMode::WQ => 8,
|
||||
ExtMode::LQ => 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ExtMode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate the form of a scalar shift: left, signed right, unsigned right.
|
||||
#[derive(Clone)]
|
||||
pub enum ShiftKind {
|
||||
Left,
|
||||
RightZ,
|
||||
RightS,
|
||||
}
|
||||
|
||||
impl ShiftKind {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
ShiftKind::Left => "shl".to_string(),
|
||||
ShiftKind::RightZ => "shr".to_string(),
|
||||
ShiftKind::RightS => "sar".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ShiftKind {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// These indicate condition code tests. Not all are represented since not all are useful in
|
||||
/// compiler-generated code.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(u8)]
|
||||
pub enum CC {
|
||||
/// overflow
|
||||
O = 0,
|
||||
/// no overflow
|
||||
NO = 1,
|
||||
|
||||
/// < unsigned
|
||||
B = 2,
|
||||
/// >= unsigned
|
||||
NB = 3,
|
||||
|
||||
/// zero
|
||||
Z = 4,
|
||||
/// not-zero
|
||||
NZ = 5,
|
||||
|
||||
/// <= unsigned
|
||||
BE = 6,
|
||||
/// > unsigend
|
||||
NBE = 7,
|
||||
|
||||
/// negative
|
||||
S = 8,
|
||||
/// not-negative
|
||||
NS = 9,
|
||||
|
||||
/// < signed
|
||||
L = 12,
|
||||
/// >= signed
|
||||
NL = 13,
|
||||
|
||||
/// <= signed
|
||||
LE = 14,
|
||||
/// > signed
|
||||
NLE = 15,
|
||||
}
|
||||
|
||||
impl CC {
|
||||
pub(crate) fn to_string(&self) -> String {
|
||||
match self {
|
||||
CC::O => "o".to_string(),
|
||||
CC::NO => "no".to_string(),
|
||||
CC::B => "b".to_string(),
|
||||
CC::NB => "nb".to_string(),
|
||||
CC::Z => "z".to_string(),
|
||||
CC::NZ => "nz".to_string(),
|
||||
CC::BE => "be".to_string(),
|
||||
CC::NBE => "nbe".to_string(),
|
||||
CC::S => "s".to_string(),
|
||||
CC::NS => "ns".to_string(),
|
||||
CC::L => "l".to_string(),
|
||||
CC::NL => "nl".to_string(),
|
||||
CC::LE => "le".to_string(),
|
||||
CC::NLE => "nle".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn invert(&self) -> CC {
|
||||
match self {
|
||||
CC::O => CC::NO,
|
||||
CC::NO => CC::O,
|
||||
|
||||
CC::B => CC::NB,
|
||||
CC::NB => CC::B,
|
||||
|
||||
CC::Z => CC::NZ,
|
||||
CC::NZ => CC::Z,
|
||||
|
||||
CC::BE => CC::NBE,
|
||||
CC::NBE => CC::BE,
|
||||
|
||||
CC::S => CC::NS,
|
||||
CC::NS => CC::S,
|
||||
|
||||
CC::L => CC::NL,
|
||||
CC::NL => CC::L,
|
||||
|
||||
CC::LE => CC::NLE,
|
||||
CC::NLE => CC::LE,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_enc(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for CC {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(fmt, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a MachLabel.
|
||||
Label(MachLabel),
|
||||
|
||||
/// A resolved reference to another instruction, in bytes.
|
||||
ResolvedOffset(isize),
|
||||
}
|
||||
|
||||
impl ShowWithRRU for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
BranchTarget::Label(l) => format!("{:?}", l),
|
||||
BranchTarget::ResolvedOffset(offs) => format!("(offset {})", offs),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Get the label.
|
||||
pub fn as_label(&self) -> Option<MachLabel> {
|
||||
match self {
|
||||
&BranchTarget::Label(l) => Some(l),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset as a signed 32 bit byte offset. This returns the
|
||||
/// offset in bytes between the first byte of the source and the first
|
||||
/// byte of the target. It does not take into account the Intel-specific
|
||||
/// rule that a branch offset is encoded as relative to the start of the
|
||||
/// following instruction. That is a problem for the emitter to deal
|
||||
/// with. If a label, returns zero.
|
||||
pub fn as_offset32_or_zero(&self) -> i32 {
|
||||
match self {
|
||||
&BranchTarget::ResolvedOffset(off) => {
|
||||
// Leave a bit of slack so that the emitter is guaranteed to
|
||||
// be able to add the length of the jump instruction encoding
|
||||
// to this value and still have a value in signed-32 range.
|
||||
assert!(off >= -0x7FFF_FF00 && off <= 0x7FFF_FF00);
|
||||
off as i32
|
||||
}
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,892 +0,0 @@
|
|||
use regalloc::{Reg, RegClass};
|
||||
|
||||
use crate::isa::x64::inst::*;
|
||||
|
||||
fn low8willSXto64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
}
|
||||
|
||||
fn low8willSXto32(x: u32) -> bool {
|
||||
let xs = x as i32;
|
||||
xs == ((xs << 24) >> 24)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions and subcomponents: emission
|
||||
|
||||
// For all of the routines that take both a memory-or-reg operand (sometimes
|
||||
// called "E" in the Intel documentation) and a reg-only operand ("G" in
|
||||
// Intelese), the order is always G first, then E.
|
||||
//
|
||||
// "enc" in the following means "hardware register encoding number".
|
||||
|
||||
#[inline(always)]
|
||||
fn mkModRegRM(m0d: u8, encRegG: u8, rmE: u8) -> u8 {
|
||||
debug_assert!(m0d < 4);
|
||||
debug_assert!(encRegG < 8);
|
||||
debug_assert!(rmE < 8);
|
||||
((m0d & 3) << 6) | ((encRegG & 7) << 3) | (rmE & 7)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
|
||||
debug_assert!(shift < 4);
|
||||
debug_assert!(encIndex < 8);
|
||||
debug_assert!(encBase < 8);
|
||||
((shift & 3) << 6) | ((encIndex & 7) << 3) | (encBase & 7)
|
||||
}
|
||||
|
||||
/// Get the encoding number from something which we sincerely hope is a real
|
||||
/// register of class I64.
|
||||
#[inline(always)]
|
||||
fn iregEnc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert!(reg.get_class() == RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
// F_*: these flags describe special handling of the insn to be generated. Be
|
||||
// careful with these. It is easy to create nonsensical combinations.
|
||||
const F_NONE: u32 = 0;
|
||||
|
||||
/// Emit the REX prefix byte even if it appears to be redundant (== 0x40).
|
||||
const F_RETAIN_REDUNDANT_REX: u32 = 1;
|
||||
|
||||
/// Set the W bit in the REX prefix to zero. By default it will be set to 1,
|
||||
/// indicating a 64-bit operation.
|
||||
const F_CLEAR_REX_W: u32 = 2;
|
||||
|
||||
/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate
|
||||
/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W.
|
||||
const F_PREFIX_66: u32 = 4;
|
||||
|
||||
/// This is the core 'emit' function for instructions that reference memory.
|
||||
///
|
||||
/// For an instruction that has as operands a register `encG` and a memory
|
||||
/// address `memE`, create and emit, first the REX prefix, then caller-supplied
|
||||
/// opcode byte(s) (`opcodes` and `numOpcodes`), then the MOD/RM byte, then
|
||||
/// optionally, a SIB byte, and finally optionally an immediate that will be
|
||||
/// derived from the `memE` operand. For most instructions up to and including
|
||||
/// SSE4.2, that will be the whole instruction.
|
||||
///
|
||||
/// The opcodes are written bigendianly for the convenience of callers. For
|
||||
/// example, if the opcode bytes to be emitted are, in this order, F3 0F 27,
|
||||
/// then the caller should pass `opcodes` == 0xF3_0F_27 and `numOpcodes` == 3.
|
||||
///
|
||||
/// The register operand is represented here not as a `Reg` but as its hardware
|
||||
/// encoding, `encG`. `flags` can specify special handling for the REX prefix.
|
||||
/// By default, the REX prefix will indicate a 64-bit operation and will be
|
||||
/// deleted if it is redundant (0x40). Note that for a 64-bit operation, the
|
||||
/// REX prefix will normally never be redundant, since REX.W must be 1 to
|
||||
/// indicate a 64-bit operation.
|
||||
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
mut numOpcodes: usize,
|
||||
encG: u8,
|
||||
memE: &Addr,
|
||||
flags: u32,
|
||||
) {
|
||||
// General comment for this function: the registers in `memE` must be
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `encG` can be derived from a register of any class.
|
||||
let prefix66 = (flags & F_PREFIX_66) != 0;
|
||||
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
|
||||
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
|
||||
// The operand-size override, if requested. This indicates a 16-bit
|
||||
// operation.
|
||||
if prefix66 {
|
||||
sink.put1(0x66);
|
||||
}
|
||||
match memE {
|
||||
Addr::IR { simm32, base: regE } => {
|
||||
// First, cook up the REX byte. This is easy.
|
||||
let encE = iregEnc(*regE);
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (encE >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// Now the mod/rm and associated immediates. This is
|
||||
// significantly complicated due to the multiple special cases.
|
||||
if *simm32 == 0
|
||||
&& encE != regs::ENC_RSP
|
||||
&& encE != regs::ENC_RBP
|
||||
&& encE != regs::ENC_R12
|
||||
&& encE != regs::ENC_R13
|
||||
{
|
||||
// FIXME JRS 2020Feb11: those four tests can surely be
|
||||
// replaced by a single mask-and-compare check. We should do
|
||||
// that because this routine is likely to be hot.
|
||||
sink.put1(mkModRegRM(0, encG & 7, encE & 7));
|
||||
} else if *simm32 == 0 && (encE == regs::ENC_RSP || encE == regs::ENC_R12) {
|
||||
sink.put1(mkModRegRM(0, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
} else if low8willSXto32(*simm32) && encE != regs::ENC_RSP && encE != regs::ENC_R12 {
|
||||
sink.put1(mkModRegRM(1, encG & 7, encE & 7));
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if encE != regs::ENC_RSP && encE != regs::ENC_R12 {
|
||||
sink.put1(mkModRegRM(2, encG & 7, encE & 7));
|
||||
sink.put4(*simm32);
|
||||
} else if (encE == regs::ENC_RSP || encE == regs::ENC_R12) && low8willSXto32(*simm32) {
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(mkModRegRM(1, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if encE == regs::ENC_R12 || encE == regs::ENC_RSP {
|
||||
//.. wait for test case for RSP case
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(mkModRegRM(2, encG & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
unreachable!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IR");
|
||||
}
|
||||
}
|
||||
// Bizarrely, the IRRS case is much simpler.
|
||||
Addr::IRRS {
|
||||
simm32,
|
||||
base: regBase,
|
||||
index: regIndex,
|
||||
shift,
|
||||
} => {
|
||||
let encBase = iregEnc(*regBase);
|
||||
let encIndex = iregEnc(*regIndex);
|
||||
// The rex byte
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = (encIndex >> 3) & 1;
|
||||
let b = (encBase >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// All other prefixes and opcodes
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// modrm, SIB, immediates
|
||||
if low8willSXto32(*simm32) && encIndex != regs::ENC_RSP {
|
||||
sink.put1(mkModRegRM(1, encG & 7, 4));
|
||||
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
|
||||
sink.put1(*simm32 as u8);
|
||||
} else if encIndex != regs::ENC_RSP {
|
||||
sink.put1(mkModRegRM(2, encG & 7, 4));
|
||||
sink.put1(mkSIB(*shift, encIndex & 7, encBase & 7));
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
panic!("emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE: IRRS");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that do not reference
|
||||
/// memory.
|
||||
///
|
||||
/// This is conceptually the same as
|
||||
/// emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE, except it is for the case
|
||||
/// where the E operand is a register rather than memory. Hence it is much
|
||||
/// simpler.
|
||||
fn emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
mut numOpcodes: usize,
|
||||
encG: u8,
|
||||
encE: u8,
|
||||
flags: u32,
|
||||
) {
|
||||
// EncG and EncE can be derived from registers of any class, and they
|
||||
// don't even have to be from the same class. For example, for an
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
let prefix66 = (flags & F_PREFIX_66) != 0;
|
||||
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
|
||||
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
|
||||
// The operand-size override
|
||||
if prefix66 {
|
||||
sink.put1(0x66);
|
||||
}
|
||||
// The rex byte
|
||||
let w = if clearRexW { 0 } else { 1 };
|
||||
let r = (encG >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (encE >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || retainRedundant {
|
||||
sink.put1(rex);
|
||||
}
|
||||
// All other prefixes and opcodes
|
||||
while numOpcodes > 0 {
|
||||
numOpcodes -= 1;
|
||||
sink.put1(((opcodes >> (numOpcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
// Now the mod/rm byte. The instruction we're generating doesn't access
|
||||
// memory, so there is no SIB byte or immediate -- we're done.
|
||||
sink.put1(mkModRegRM(3, encG & 7, encE & 7));
|
||||
}
|
||||
|
||||
// These are merely wrappers for the above two functions that facilitate passing
|
||||
// actual `Reg`s rather than their encodings.
|
||||
|
||||
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
numOpcodes: usize,
|
||||
regG: Reg,
|
||||
memE: &Addr,
|
||||
flags: u32,
|
||||
) {
|
||||
// JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
|
||||
let encG = iregEnc(regG);
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
|
||||
}
|
||||
|
||||
fn emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
opcodes: u32,
|
||||
numOpcodes: usize,
|
||||
regG: Reg,
|
||||
regE: Reg,
|
||||
flags: u32,
|
||||
) {
|
||||
// JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
|
||||
let encG = iregEnc(regG);
|
||||
let encE = iregEnc(regE);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
match size {
|
||||
8 | 4 => sink.put4(simm32),
|
||||
2 => sink.put2(simm32 as u16),
|
||||
1 => sink.put1(simm32 as u8),
|
||||
_ => panic!("x64::Inst::emit_simm: unreachable"),
|
||||
}
|
||||
}
|
||||
|
||||
/// The top-level emit function.
|
||||
///
|
||||
/// Important! Do not add improved (shortened) encoding cases to existing
|
||||
/// instructions without also adding tests for those improved encodings. That
|
||||
/// is a dangerous game that leads to hard-to-track-down errors in the emitted
|
||||
/// code.
|
||||
///
|
||||
/// For all instructions, make sure to have test coverage for all of the
|
||||
/// following situations. Do this by creating the cross product resulting from
|
||||
/// applying the following rules to each operand:
|
||||
///
|
||||
/// (1) for any insn that mentions a register: one test using a register from
|
||||
/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
|
||||
/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
|
||||
/// This helps detect incorrect REX prefix construction.
|
||||
///
|
||||
/// (2) for any insn that mentions a byte register: one test for each of the
|
||||
/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
|
||||
/// [r8b .. r11b] and [r12b .. r15b]. This checks that
|
||||
/// apparently-redundant REX prefixes are retained when required.
|
||||
///
|
||||
/// (3) for any insn that contains an immediate field, check the following
|
||||
/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
|
||||
/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
|
||||
/// instructions that require a 32-bit immediate have a short-form encoding
|
||||
/// when the imm is in simm8 range.
|
||||
///
|
||||
/// Rules (1), (2) and (3) don't apply for registers within address expressions
|
||||
/// (`Addr`s). Those are already pretty well tested, and the registers in them
|
||||
/// don't have any effect on the containing instruction (apart from possibly
|
||||
/// require REX prefix bits).
|
||||
///
|
||||
/// When choosing registers for a test, avoid using registers with the same
|
||||
/// offset within a given group. For example, don't use rax and r8, since they
|
||||
/// both have the lowest 3 bits as 000, and so the test won't detect errors
|
||||
/// where those 3-bit register sub-fields are confused by the emitter. Instead
|
||||
/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
|
||||
/// and bpl since they have the same offset in their group; use instead (eg) cl
|
||||
/// and sil.
|
||||
///
|
||||
/// For all instructions, also add a test that uses only low-half registers
|
||||
/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
|
||||
/// prefixes are correctly omitted. This low-half restriction must apply to
|
||||
/// _all_ registers in the insn, even those in address expressions.
|
||||
///
|
||||
/// Following these rules creates large numbers of test cases, but it's the
|
||||
/// only way to make the emitter reliable.
|
||||
///
|
||||
/// Known possible improvements:
|
||||
///
|
||||
/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
|
||||
/// care?)
|
||||
pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
||||
match inst {
|
||||
Inst::Nop { len: 0 } => {}
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src: srcE,
|
||||
dst: regG,
|
||||
} => {
|
||||
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
|
||||
if *op == RMI_R_Op::Mul {
|
||||
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
|
||||
// we have to special-case it.
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
0x0FAF,
|
||||
2,
|
||||
regG.to_reg(),
|
||||
*regE,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FAF,
|
||||
2,
|
||||
regG.to_reg(),
|
||||
addr,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if useImm8 { 0x6B } else { 0x69 };
|
||||
// Yes, really, regG twice.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
opcode,
|
||||
1,
|
||||
regG.to_reg(),
|
||||
regG.to_reg(),
|
||||
flags,
|
||||
);
|
||||
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (opcode_R, opcode_M, subopcode_I) = match op {
|
||||
RMI_R_Op::Add => (0x01, 0x03, 0),
|
||||
RMI_R_Op::Sub => (0x29, 0x2B, 5),
|
||||
RMI_R_Op::And => (0x21, 0x23, 4),
|
||||
RMI_R_Op::Or => (0x09, 0x0B, 1),
|
||||
RMI_R_Op::Xor => (0x31, 0x33, 6),
|
||||
RMI_R_Op::Mul => panic!("unreachable"),
|
||||
};
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
// Note. The arguments .. regE .. regG .. sequence
|
||||
// here is the opposite of what is expected. I'm not
|
||||
// sure why this is. But I am fairly sure that the
|
||||
// arg order could be switched back to the expected
|
||||
// .. regG .. regE .. if opcode_rr is also switched
|
||||
// over to the "other" basic integer opcode (viz, the
|
||||
// R/RM vs RM/R duality). However, that would mean
|
||||
// that the test results won't be in accordance with
|
||||
// the GNU as reference output. In other words, the
|
||||
// inversion exists as a result of using GNU as as a
|
||||
// gold standard.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(
|
||||
sink,
|
||||
opcode_R,
|
||||
1,
|
||||
*regE,
|
||||
regG.to_reg(),
|
||||
flags,
|
||||
);
|
||||
// NB: if this is ever extended to handle byte size
|
||||
// ops, be sure to retain redundant REX prefixes.
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
opcode_M,
|
||||
1,
|
||||
regG.to_reg(),
|
||||
addr,
|
||||
flags,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if useImm8 { 0x83 } else { 0x81 };
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let encG = iregEnc(regG.to_reg());
|
||||
emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
|
||||
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
} => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
if *dst_is_64 {
|
||||
// FIXME JRS 2020Feb10: also use the 32-bit case here when
|
||||
// possible
|
||||
sink.put1(0x48 | ((encDst >> 3) & 1));
|
||||
sink.put1(0xB8 | (encDst & 7));
|
||||
sink.put8(*simm64);
|
||||
} else {
|
||||
if ((encDst >> 3) & 1) == 1 {
|
||||
sink.put1(0x41);
|
||||
}
|
||||
sink.put1(0xB8 | (encDst & 7));
|
||||
sink.put4(*simm64 as u32);
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_R { is_64, src, dst } => {
|
||||
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
|
||||
emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
|
||||
}
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
ExtMode::BL => {
|
||||
// MOVZBL is (REX.W==0) 0F B6 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::BQ => {
|
||||
// MOVZBQ is (REX.W==1) 0F B6 /r
|
||||
// I'm not sure why the Intel manual offers different
|
||||
// encodings for MOVZBQ than for MOVZBL. AIUI they should
|
||||
// achieve the same, since MOVZBL is just going to zero out
|
||||
// the upper half of the destination anyway.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB6,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::WL => {
|
||||
// MOVZWL is (REX.W==0) 0F B7 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::WQ => {
|
||||
// MOVZWQ is (REX.W==1) 0F B7 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FB7,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::LQ => {
|
||||
// This is just a standard 32 bit load, and we rely on the
|
||||
// default zero-extension rule to perform the extension.
|
||||
// MOV r/m32, r32 is (REX.W==0) 8B /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x8B,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
|
||||
}
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => {
|
||||
match extMode {
|
||||
ExtMode::BL => {
|
||||
// MOVSBL is (REX.W==0) 0F BE /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::BQ => {
|
||||
// MOVSBQ is (REX.W==1) 0F BE /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBE,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::WL => {
|
||||
// MOVSWL is (REX.W==0) 0F BF /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
ExtMode::WQ => {
|
||||
// MOVSWQ is (REX.W==1) 0F BF /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x0FBF,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
ExtMode::LQ => {
|
||||
// MOVSLQ is (REX.W==1) 63 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x63,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
F_NONE,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_M { size, src, addr } => {
|
||||
match size {
|
||||
1 => {
|
||||
// This is one of the few places where the presence of a
|
||||
// redundant REX prefix changes the meaning of the
|
||||
// instruction.
|
||||
let encSrc = iregEnc(*src);
|
||||
let retainRedundantRex = if encSrc >= 4 && encSrc <= 7 {
|
||||
F_RETAIN_REDUNDANT_REX
|
||||
} else {
|
||||
0
|
||||
};
|
||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x88,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W | retainRedundantRex,
|
||||
)
|
||||
}
|
||||
2 => {
|
||||
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W | F_PREFIX_66,
|
||||
)
|
||||
}
|
||||
4 => {
|
||||
// MOV r32, r/m32 is (REX.W==0) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
|
||||
sink,
|
||||
0x89,
|
||||
1,
|
||||
*src,
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
)
|
||||
}
|
||||
8 => {
|
||||
// MOV r64, r/m64 is (REX.W==1) 89 /r
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
|
||||
}
|
||||
_ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
|
||||
}
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
let subopcode = match kind {
|
||||
ShiftKind::Left => 4,
|
||||
ShiftKind::RightZ => 5,
|
||||
ShiftKind::RightS => 7,
|
||||
};
|
||||
match num_bits {
|
||||
None => {
|
||||
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
|
||||
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xD3,
|
||||
1,
|
||||
subopcode,
|
||||
encDst,
|
||||
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
|
||||
);
|
||||
}
|
||||
Some(num_bits) => {
|
||||
// SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
|
||||
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
|
||||
// When the shift amount is 1, there's an even shorter encoding, but we don't
|
||||
// bother with that nicety here.
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xC1,
|
||||
1,
|
||||
subopcode,
|
||||
encDst,
|
||||
if *is_64 { F_NONE } else { F_CLEAR_REX_W },
|
||||
);
|
||||
sink.put1(*num_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size,
|
||||
src: srcE,
|
||||
dst: regG,
|
||||
} => {
|
||||
let mut retainRedundantRex = 0;
|
||||
if *size == 1 {
|
||||
// Here, a redundant REX prefix changes the meaning of the
|
||||
// instruction.
|
||||
let encG = iregEnc(*regG);
|
||||
if encG >= 4 && encG <= 7 {
|
||||
retainRedundantRex = F_RETAIN_REDUNDANT_REX;
|
||||
}
|
||||
}
|
||||
let mut flags = match size {
|
||||
8 => F_NONE,
|
||||
4 => F_CLEAR_REX_W,
|
||||
2 => F_CLEAR_REX_W | F_PREFIX_66,
|
||||
1 => F_CLEAR_REX_W | retainRedundantRex,
|
||||
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
|
||||
};
|
||||
match srcE {
|
||||
RMI::R { reg: regE } => {
|
||||
let opcode = if *size == 1 { 0x38 } else { 0x39 };
|
||||
if *size == 1 {
|
||||
// We also need to check whether the E register forces
|
||||
// the use of a redundant REX.
|
||||
let encE = iregEnc(*regE);
|
||||
if encE >= 4 && encE <= 7 {
|
||||
flags |= F_RETAIN_REDUNDANT_REX;
|
||||
}
|
||||
}
|
||||
// Same comment re swapped args as for Alu_RMI_R.
|
||||
emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
let opcode = if *size == 1 { 0x3A } else { 0x3B };
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
// FIXME JRS 2020Feb11: there are shorter encodings for
|
||||
// cmp $imm, rax/eax/ax/al.
|
||||
let useImm8 = low8willSXto32(*simm32);
|
||||
let opcode = if *size == 1 {
|
||||
0x80
|
||||
} else if useImm8 {
|
||||
0x83
|
||||
} else {
|
||||
0x81
|
||||
};
|
||||
// And also here we use the "normal" G-E ordering.
|
||||
let encG = iregEnc(*regG);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink, opcode, 1, 7, /*subopcode*/
|
||||
encG, flags,
|
||||
);
|
||||
emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
match src {
|
||||
RMI::R { reg } => {
|
||||
let encReg = iregEnc(*reg);
|
||||
let rex = 0x40 | ((encReg >> 3) & 1);
|
||||
if rex != 0x40 {
|
||||
sink.put1(rex);
|
||||
}
|
||||
sink.put1(0x50 | (encReg & 7));
|
||||
}
|
||||
RMI::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
6, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RMI::I { simm32 } => {
|
||||
if low8willSXto64(*simm32) {
|
||||
sink.put1(0x6A);
|
||||
sink.put1(*simm32 as u8);
|
||||
} else {
|
||||
sink.put1(0x68);
|
||||
sink.put4(*simm32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
let encDst = iregEnc(dst.to_reg());
|
||||
if encDst >= 8 {
|
||||
// 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant
|
||||
// here.
|
||||
sink.put1(0x41);
|
||||
}
|
||||
sink.put1(0x58 + (encDst & 7));
|
||||
}
|
||||
//
|
||||
// ** Inst::CallKnown
|
||||
//
|
||||
Inst::CallUnknown { dest } => {
|
||||
match dest {
|
||||
RM::R { reg } => {
|
||||
let regEnc = iregEnc(*reg);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
regEnc,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RM::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
2, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::Ret {} => sink.put1(0xC3),
|
||||
|
||||
Inst::JmpKnown { dest } => {
|
||||
let disp = dest.as_offset32_or_zero() - 5;
|
||||
let disp = disp as u32;
|
||||
let br_start = sink.cur_offset();
|
||||
let br_disp_off = br_start + 1;
|
||||
let br_end = br_start + 5;
|
||||
if let Some(l) = dest.as_label() {
|
||||
sink.use_label_at_offset(br_disp_off, l, LabelUse::Rel32);
|
||||
sink.add_uncond_branch(br_start, br_end, l);
|
||||
}
|
||||
sink.put1(0xE9);
|
||||
sink.put4(disp);
|
||||
}
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => {
|
||||
// Conditional part.
|
||||
|
||||
// This insn is 6 bytes long. Currently `offset` is relative to
|
||||
// the start of this insn, but the Intel encoding requires it to
|
||||
// be relative to the start of the next instruction. Hence the
|
||||
// adjustment.
|
||||
let taken_disp = taken.as_offset32_or_zero() - 6;
|
||||
let taken_disp = taken_disp as u32;
|
||||
let cond_start = sink.cur_offset();
|
||||
let cond_disp_off = cond_start + 2;
|
||||
let cond_end = cond_start + 6;
|
||||
if let Some(l) = taken.as_label() {
|
||||
sink.use_label_at_offset(cond_disp_off, l, LabelUse::Rel32);
|
||||
let inverted: [u8; 6] =
|
||||
[0x0F, 0x80 + (cc.invert().get_enc()), 0xFA, 0xFF, 0xFF, 0xFF];
|
||||
sink.add_cond_branch(cond_start, cond_end, l, &inverted[..]);
|
||||
}
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0x80 + cc.get_enc());
|
||||
sink.put4(taken_disp);
|
||||
|
||||
// Unconditional part.
|
||||
|
||||
let nt_disp = not_taken.as_offset32_or_zero() - 5;
|
||||
let nt_disp = nt_disp as u32;
|
||||
let uncond_start = sink.cur_offset();
|
||||
let uncond_disp_off = uncond_start + 1;
|
||||
let uncond_end = uncond_start + 5;
|
||||
if let Some(l) = not_taken.as_label() {
|
||||
sink.use_label_at_offset(uncond_disp_off, l, LabelUse::Rel32);
|
||||
sink.add_uncond_branch(uncond_start, uncond_end, l);
|
||||
}
|
||||
sink.put1(0xE9);
|
||||
sink.put4(nt_disp);
|
||||
}
|
||||
Inst::JmpUnknown { target } => {
|
||||
match target {
|
||||
RM::R { reg } => {
|
||||
let regEnc = iregEnc(*reg);
|
||||
emit_REX_OPCODES_MODRM_encG_encE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
regEnc,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
RM::M { addr } => {
|
||||
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
|
||||
sink,
|
||||
0xFF,
|
||||
1,
|
||||
4, /*subopcode*/
|
||||
addr,
|
||||
F_CLEAR_REX_W,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
|
||||
}
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,905 +0,0 @@
|
|||
//! This module defines x86_64-specific machine instruction types.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use core::convert::TryFrom;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::Set;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable};
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
|
||||
use crate::ir::ExternalName;
|
||||
use crate::ir::Type;
|
||||
use crate::machinst::*;
|
||||
use crate::settings::Flags;
|
||||
use crate::{settings, CodegenError, CodegenResult};
|
||||
|
||||
pub mod args;
|
||||
mod emit;
|
||||
#[cfg(test)]
|
||||
mod emit_tests;
|
||||
pub mod regs;
|
||||
|
||||
use args::*;
|
||||
use regs::{create_reg_universe_systemv, show_ireg_sized};
|
||||
|
||||
//=============================================================================
|
||||
// Instructions (top level): definition
|
||||
|
||||
// Don't build these directly. Instead use the Inst:: functions to create them.
|
||||
|
||||
/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Inst {
|
||||
/// nops of various sizes, including zero
|
||||
Nop { len: u8 },
|
||||
|
||||
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
|
||||
Alu_RMI_R {
|
||||
is_64: bool,
|
||||
op: RMI_R_Op,
|
||||
src: RMI,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// (imm32 imm64) reg.
|
||||
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32
|
||||
Imm_R {
|
||||
dst_is_64: bool,
|
||||
simm64: u64,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (64 32) reg reg
|
||||
Mov_R_R {
|
||||
is_64: bool,
|
||||
src: Reg,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// movz (bl bq wl wq lq) addr reg (good for all ZX loads except 64->64).
|
||||
/// Note that the lq variant doesn't really exist since the default
|
||||
/// zero-extend rule makes it unnecessary. For that case we emit the
|
||||
/// equivalent "movl AM, reg32".
|
||||
MovZX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// A plain 64-bit integer load, since MovZX_M_R can't represent that
|
||||
Mov64_M_R { addr: Addr, dst: Writable<Reg> },
|
||||
|
||||
/// movs (bl bq wl wq lq) addr reg (good for all SX loads)
|
||||
MovSX_M_R {
|
||||
extMode: ExtMode,
|
||||
addr: Addr,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// mov (b w l q) reg addr (good for all integer stores)
|
||||
Mov_R_M {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
},
|
||||
|
||||
/// (shl shr sar) (l q) imm reg
|
||||
Shift_R {
|
||||
is_64: bool,
|
||||
kind: ShiftKind,
|
||||
/// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
|
||||
num_bits: Option<u8>,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// cmp (b w l q) (reg addr imm) reg
|
||||
Cmp_RMI_R {
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RMI,
|
||||
dst: Reg,
|
||||
},
|
||||
|
||||
/// pushq (reg addr imm)
|
||||
Push64 { src: RMI },
|
||||
|
||||
/// popq reg
|
||||
Pop64 { dst: Writable<Reg> },
|
||||
|
||||
/// call simm32
|
||||
CallKnown {
|
||||
dest: ExternalName,
|
||||
uses: Set<Reg>,
|
||||
defs: Set<Writable<Reg>>,
|
||||
},
|
||||
|
||||
/// callq (reg mem)
|
||||
CallUnknown {
|
||||
dest: RM,
|
||||
//uses: Set<Reg>,
|
||||
//defs: Set<Writable<Reg>>,
|
||||
},
|
||||
|
||||
// ---- branches (exactly one must appear at end of BB) ----
|
||||
/// ret
|
||||
Ret,
|
||||
|
||||
/// A placeholder instruction, generating no code, meaning that a function epilogue must be
|
||||
/// inserted there.
|
||||
EpiloguePlaceholder,
|
||||
|
||||
/// jmp simm32
|
||||
JmpKnown { dest: BranchTarget },
|
||||
|
||||
/// jcond cond target target
|
||||
/// Symmetrical two-way conditional branch.
|
||||
/// Emitted as a compound sequence; the MachBuffer will shrink it
|
||||
/// as appropriate.
|
||||
JmpCondSymm {
|
||||
cc: CC,
|
||||
taken: BranchTarget,
|
||||
not_taken: BranchTarget,
|
||||
},
|
||||
|
||||
/// jmpq (reg mem)
|
||||
JmpUnknown { target: RM },
|
||||
}
|
||||
|
||||
// Handy constructors for Insts.
|
||||
|
||||
// For various sizes, will some number of lowest bits sign extend to be the
|
||||
// same as the whole value?
|
||||
pub(crate) fn low32willSXto64(x: u64) -> bool {
|
||||
let xs = x as i64;
|
||||
xs == ((xs << 32) >> 32)
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
pub(crate) fn nop(len: u8) -> Self {
|
||||
debug_assert!(len <= 16);
|
||||
Self::Nop { len }
|
||||
}
|
||||
|
||||
pub(crate) fn alu_rmi_r(is_64: bool, op: RMI_R_Op, src: RMI, dst: Writable<Reg>) -> Self {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Self::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
if !dst_is_64 {
|
||||
debug_assert!(low32willSXto64(simm64));
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov_R_R { is_64, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovZX_M_R { extMode, addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov64_m_r(addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Mov64_M_R { addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn movsx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::MovSX_M_R { extMode, addr, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn mov_r_m(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: Reg,
|
||||
addr: Addr,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(src.get_class() == RegClass::I64);
|
||||
Inst::Mov_R_M { size, src, addr }
|
||||
}
|
||||
|
||||
pub(crate) fn shift_r(
|
||||
is_64: bool,
|
||||
kind: ShiftKind,
|
||||
num_bits: Option<u8>,
|
||||
dst: Writable<Reg>,
|
||||
) -> Inst {
|
||||
debug_assert!(if let Some(num_bits) = num_bits {
|
||||
num_bits < if is_64 { 64 } else { 32 }
|
||||
} else {
|
||||
true
|
||||
});
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn cmp_rmi_r(
|
||||
size: u8, // 1, 2, 4 or 8
|
||||
src: RMI,
|
||||
dst: Reg,
|
||||
) -> Inst {
|
||||
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
|
||||
debug_assert!(dst.get_class() == RegClass::I64);
|
||||
Inst::Cmp_RMI_R { size, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn push64(src: RMI) -> Inst {
|
||||
Inst::Push64 { src }
|
||||
}
|
||||
|
||||
pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
|
||||
Inst::Pop64 { dst }
|
||||
}
|
||||
|
||||
pub(crate) fn call_unknown(dest: RM) -> Inst {
|
||||
Inst::CallUnknown { dest }
|
||||
}
|
||||
|
||||
pub(crate) fn ret() -> Inst {
|
||||
Inst::Ret
|
||||
}
|
||||
|
||||
pub(crate) fn epilogue_placeholder() -> Inst {
|
||||
Inst::EpiloguePlaceholder
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_known(dest: BranchTarget) -> Inst {
|
||||
Inst::JmpKnown { dest }
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_cond_symm(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst {
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn jmp_unknown(target: RM) -> Inst {
|
||||
Inst::JmpUnknown { target }
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: printing
|
||||
|
||||
impl ShowWithRRU for Inst {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
fn ljustify(s: String) -> String {
|
||||
let w = 7;
|
||||
if s.len() >= w {
|
||||
s
|
||||
} else {
|
||||
let need = usize::min(w, w - s.len());
|
||||
s + &format!("{nil: <width$}", nil = "", width = need)
|
||||
}
|
||||
}
|
||||
|
||||
fn ljustify2(s1: String, s2: String) -> String {
|
||||
ljustify(s1 + &s2)
|
||||
}
|
||||
|
||||
fn suffixLQ(is_64: bool) -> String {
|
||||
(if is_64 { "q" } else { "l" }).to_string()
|
||||
}
|
||||
|
||||
fn sizeLQ(is_64: bool) -> u8 {
|
||||
if is_64 {
|
||||
8
|
||||
} else {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
fn suffixBWLQ(size: u8) -> String {
|
||||
match size {
|
||||
1 => "b".to_string(),
|
||||
2 => "w".to_string(),
|
||||
4 => "l".to_string(),
|
||||
8 => "q".to_string(),
|
||||
_ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
|
||||
}
|
||||
}
|
||||
|
||||
match self {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
Inst::Alu_RMI_R {
|
||||
is_64,
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
} => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2(op.to_string(), suffixLQ(*is_64)),
|
||||
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
|
||||
),
|
||||
Inst::Imm_R {
|
||||
dst_is_64,
|
||||
simm64,
|
||||
dst,
|
||||
} => {
|
||||
if *dst_is_64 {
|
||||
format!(
|
||||
"{} ${}, {}",
|
||||
ljustify("movabsq".to_string()),
|
||||
*simm64 as i64,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8)
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{} ${}, {}",
|
||||
ljustify("movl".to_string()),
|
||||
(*simm64 as u32) as i32,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 4)
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov_R_R { is_64, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixLQ(*is_64)),
|
||||
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
Inst::MovZX_M_R { extMode, addr, dst } => {
|
||||
if *extMode == ExtMode::LQ {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movl".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 4)
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movz".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
)
|
||||
}
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify("movq".to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
dst.show_rru(mb_rru)
|
||||
),
|
||||
Inst::MovSX_M_R { extMode, addr, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("movs".to_string(), extMode.to_string()),
|
||||
addr.show_rru(mb_rru),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, extMode.dst_size())
|
||||
),
|
||||
Inst::Mov_R_M { size, src, addr } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("mov".to_string(), suffixBWLQ(*size)),
|
||||
show_ireg_sized(*src, mb_rru, *size),
|
||||
addr.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Shift_R {
|
||||
is_64,
|
||||
kind,
|
||||
num_bits,
|
||||
dst,
|
||||
} => match num_bits {
|
||||
None => format!(
|
||||
"{} %cl, {}",
|
||||
ljustify2(kind.to_string(), suffixLQ(*is_64)),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
|
||||
Some(num_bits) => format!(
|
||||
"{} ${}, {}",
|
||||
ljustify2(kind.to_string(), suffixLQ(*is_64)),
|
||||
num_bits,
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
|
||||
),
|
||||
},
|
||||
Inst::Cmp_RMI_R { size, src, dst } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("cmp".to_string(), suffixBWLQ(*size)),
|
||||
src.show_rru_sized(mb_rru, *size),
|
||||
show_ireg_sized(*dst, mb_rru, *size)
|
||||
),
|
||||
Inst::Push64 { src } => {
|
||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
|
||||
}
|
||||
//Inst::CallKnown { target } => format!("{} {:?}", ljustify("call".to_string()), target),
|
||||
Inst::CallKnown { .. } => "**CallKnown**".to_string(),
|
||||
Inst::CallUnknown { dest } => format!(
|
||||
"{} *{}",
|
||||
ljustify("call".to_string()),
|
||||
dest.show_rru(mb_rru)
|
||||
),
|
||||
Inst::Ret => "ret".to_string(),
|
||||
Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
|
||||
Inst::JmpKnown { dest } => {
|
||||
format!("{} {}", ljustify("jmp".to_string()), dest.show_rru(mb_rru))
|
||||
}
|
||||
Inst::JmpCondSymm {
|
||||
cc,
|
||||
taken,
|
||||
not_taken,
|
||||
} => format!(
|
||||
"{} taken={} not_taken={}",
|
||||
ljustify2("j".to_string(), cc.to_string()),
|
||||
taken.show_rru(mb_rru),
|
||||
not_taken.show_rru(mb_rru)
|
||||
),
|
||||
//
|
||||
Inst::JmpUnknown { target } => format!(
|
||||
"{} *{}",
|
||||
ljustify("jmp".to_string()),
|
||||
target.show_rru(mb_rru)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Temp hook for legacy printing machinery
|
||||
impl fmt::Debug for Inst {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
// Print the insn without a Universe :-(
|
||||
write!(fmt, "{}", self.show_rru(None))
|
||||
}
|
||||
}
|
||||
|
||||
fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// This is a bit subtle. If some register is in the modified set, then it may not be in either
|
||||
// the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
|
||||
// regalloc.rs will "fix" this for us by removing the the modified set from the use and def
|
||||
// sets.
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
src,
|
||||
dst,
|
||||
} => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
dst,
|
||||
} => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_R { is_64: _, src, dst } => {
|
||||
collector.add_use(*src);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
addr,
|
||||
dst,
|
||||
} => {
|
||||
addr.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::Mov_R_M { size: _, src, addr } => {
|
||||
collector.add_use(*src);
|
||||
addr.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits,
|
||||
dst,
|
||||
} => {
|
||||
if num_bits.is_none() {
|
||||
collector.add_use(regs::rcx());
|
||||
}
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R { size: _, src, dst } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_use(*dst); // yes, really `add_use`
|
||||
}
|
||||
Inst::Push64 { src } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(Writable::from_reg(regs::rsp()));
|
||||
}
|
||||
Inst::Pop64 { dst } => {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
} => {
|
||||
// FIXME add arg regs (iru.used) and caller-saved regs (iru.defined)
|
||||
unimplemented!();
|
||||
}
|
||||
Inst::CallUnknown { dest } => {
|
||||
dest.get_regs_as_uses(collector);
|
||||
}
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.get_regs_as_uses(collector);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_get_regs inst"),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions and subcomponents: map_regs
|
||||
|
||||
fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
|
||||
if r.is_virtual() {
|
||||
let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
|
||||
*r = new;
|
||||
}
|
||||
}
|
||||
|
||||
fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
|
||||
if r.to_reg().is_virtual() {
|
||||
let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
impl Addr {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
Addr::IR {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
} => map_use(map, base),
|
||||
Addr::IRRS {
|
||||
simm32: _,
|
||||
ref mut base,
|
||||
ref mut index,
|
||||
shift: _,
|
||||
} => {
|
||||
map_use(map, base);
|
||||
map_use(map, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RMI {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RMI::R { ref mut reg } => map_use(map, reg),
|
||||
RMI::M { ref mut addr } => addr.map_uses(map),
|
||||
RMI::I { simm32: _ } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RM {
|
||||
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
|
||||
match self {
|
||||
RM::R { ref mut reg } => map_use(map, reg),
|
||||
RM::M { ref mut addr } => addr.map_uses(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
// Note this must be carefully synchronized with x64_get_regs.
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
is_64: _,
|
||||
op: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Imm_R {
|
||||
dst_is_64: _,
|
||||
simm64: _,
|
||||
ref mut dst,
|
||||
} => map_def(mapper, dst),
|
||||
Inst::Mov_R_R {
|
||||
is_64: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovZX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
ref mut dst,
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov64_M_R { addr, dst } => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::MovSX_M_R {
|
||||
extMode: _,
|
||||
ref mut addr,
|
||||
ref mut dst,
|
||||
} => {
|
||||
addr.map_uses(mapper);
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::Mov_R_M {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut addr,
|
||||
} => {
|
||||
map_use(mapper, src);
|
||||
addr.map_uses(mapper);
|
||||
}
|
||||
Inst::Shift_R {
|
||||
is_64: _,
|
||||
kind: _,
|
||||
num_bits: _,
|
||||
ref mut dst,
|
||||
} => {
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
Inst::Cmp_RMI_R {
|
||||
size: _,
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
} => {
|
||||
src.map_uses(mapper);
|
||||
map_use(mapper, dst);
|
||||
}
|
||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||
Inst::Pop64 { ref mut dst } => {
|
||||
map_def(mapper, dst);
|
||||
}
|
||||
Inst::CallKnown {
|
||||
dest: _,
|
||||
uses: _,
|
||||
defs: _,
|
||||
} => {}
|
||||
Inst::CallUnknown { dest } => dest.map_uses(mapper),
|
||||
Inst::Ret => {}
|
||||
Inst::EpiloguePlaceholder => {}
|
||||
Inst::JmpKnown { dest: _ } => {}
|
||||
Inst::JmpCondSymm {
|
||||
cc: _,
|
||||
taken: _,
|
||||
not_taken: _,
|
||||
} => {}
|
||||
//Inst::JmpUnknown { target } => {
|
||||
// target.apply_map(mapper);
|
||||
//}
|
||||
Inst::Nop { .. } | Inst::JmpUnknown { .. } => unimplemented!("x64_map_regs opcode"),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: misc functions and external interface
|
||||
|
||||
impl MachInst for Inst {
|
||||
fn get_regs(&self, collector: &mut RegUsageCollector) {
|
||||
x64_get_regs(&self, collector)
|
||||
}
|
||||
|
||||
fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||
x64_map_regs(self, mapper);
|
||||
}
|
||||
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
|
||||
// Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
|
||||
// out the upper 32 bits of the destination. For example, we could
|
||||
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
|
||||
// %reg.
|
||||
match self {
|
||||
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_epilogue_placeholder(&self) -> bool {
|
||||
if let Self::EpiloguePlaceholder = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
|
||||
match self {
|
||||
// Interesting cases.
|
||||
&Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
|
||||
&Self::JmpKnown { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
|
||||
&Self::JmpCondSymm {
|
||||
cc: _,
|
||||
taken,
|
||||
not_taken,
|
||||
} => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
|
||||
// All other cases are boring.
|
||||
_ => MachTerminator::None,
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, _ty: Type) -> Inst {
|
||||
let rc_dst = dst_reg.to_reg().get_class();
|
||||
let rc_src = src_reg.get_class();
|
||||
// If this isn't true, we have gone way off the rails.
|
||||
debug_assert!(rc_dst == rc_src);
|
||||
match rc_dst {
|
||||
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
|
||||
_ => panic!("gen_move(x64): unhandled regclass"),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_zero_len_nop() -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn gen_nop(_preferred_size: usize) -> Inst {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
|
||||
None
|
||||
}
|
||||
|
||||
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
|
||||
match ty {
|
||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
ty
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_jump(label: MachLabel) -> Inst {
|
||||
Inst::jmp_known(BranchTarget::Label(label))
|
||||
}
|
||||
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, _: Type) -> SmallVec<[Self; 4]> {
|
||||
let mut ret = SmallVec::new();
|
||||
let is64 = value > 0xffff_ffff;
|
||||
ret.push(Inst::imm_r(is64, value, to_reg));
|
||||
ret
|
||||
}
|
||||
|
||||
fn reg_universe(flags: &Flags) -> RealRegUniverse {
|
||||
create_reg_universe_systemv(flags)
|
||||
}
|
||||
|
||||
fn worst_case_size() -> CodeOffset {
|
||||
15
|
||||
}
|
||||
|
||||
type LabelUse = LabelUse;
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
type State = ();
|
||||
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>, _flags: &settings::Flags, _: &mut Self::State) {
|
||||
emit::emit(self, sink);
|
||||
}
|
||||
}
|
||||
|
||||
/// A label-use (internal relocation) in generated code.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum LabelUse {
|
||||
/// A 32-bit offset from location of relocation itself, added to the
|
||||
/// existing value at that location.
|
||||
Rel32,
|
||||
}
|
||||
|
||||
impl MachInstLabelUse for LabelUse {
|
||||
const ALIGN: CodeOffset = 1;
|
||||
|
||||
fn max_pos_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x7fff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
fn max_neg_range(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0x8000_0000,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 4,
|
||||
}
|
||||
}
|
||||
|
||||
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
let addend = i32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
|
||||
let value = i32::try_from(label_offset)
|
||||
.unwrap()
|
||||
.wrapping_sub(i32::try_from(use_offset).unwrap())
|
||||
.wrapping_add(addend);
|
||||
buffer.copy_from_slice(&value.to_le_bytes()[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn supports_veneer(self) -> bool {
|
||||
match self {
|
||||
LabelUse::Rel32 => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn veneer_size(self) -> CodeOffset {
|
||||
match self {
|
||||
LabelUse::Rel32 => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
|
||||
match self {
|
||||
LabelUse::Rel32 => {
|
||||
panic!("Veneer not supported for Rel32 label-use.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,261 +0,0 @@
|
|||
//! Registers, the Universe thereof, and printing.
|
||||
//!
|
||||
//! These are ordered by sequence number, as required in the Universe. The strange ordering is
|
||||
//! intended to make callee-save registers available before caller-saved ones. This is a net win
|
||||
//! provided that each function makes at least one onward call. It'll be a net loss for leaf
|
||||
//! functions, and we should change the ordering in that case, so as to make caller-save regs
|
||||
//! available first.
|
||||
//!
|
||||
//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
|
||||
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
|
||||
//! for each function we compile.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use std::string::String;
|
||||
|
||||
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
|
||||
|
||||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::settings;
|
||||
|
||||
// Hardware encodings for a few registers.
|
||||
|
||||
pub const ENC_RBX: u8 = 3;
|
||||
pub const ENC_RSP: u8 = 4;
|
||||
pub const ENC_RBP: u8 = 5;
|
||||
pub const ENC_R12: u8 = 12;
|
||||
pub const ENC_R13: u8 = 13;
|
||||
pub const ENC_R14: u8 = 14;
|
||||
pub const ENC_R15: u8 = 15;
|
||||
|
||||
fn gpr(enc: u8, index: u8) -> Reg {
|
||||
Reg::new_real(RegClass::I64, enc, index)
|
||||
}
|
||||
|
||||
pub(crate) fn r12() -> Reg {
|
||||
gpr(ENC_R12, 0)
|
||||
}
|
||||
pub(crate) fn r13() -> Reg {
|
||||
gpr(ENC_R13, 1)
|
||||
}
|
||||
pub(crate) fn r14() -> Reg {
|
||||
gpr(ENC_R14, 2)
|
||||
}
|
||||
pub(crate) fn r15() -> Reg {
|
||||
gpr(ENC_R15, 3)
|
||||
}
|
||||
pub(crate) fn rbx() -> Reg {
|
||||
gpr(ENC_RBX, 4)
|
||||
}
|
||||
pub(crate) fn rsi() -> Reg {
|
||||
gpr(6, 5)
|
||||
}
|
||||
pub(crate) fn rdi() -> Reg {
|
||||
gpr(7, 6)
|
||||
}
|
||||
pub(crate) fn rax() -> Reg {
|
||||
gpr(0, 7)
|
||||
}
|
||||
pub(crate) fn rcx() -> Reg {
|
||||
gpr(1, 8)
|
||||
}
|
||||
pub(crate) fn rdx() -> Reg {
|
||||
gpr(2, 9)
|
||||
}
|
||||
pub(crate) fn r8() -> Reg {
|
||||
gpr(8, 10)
|
||||
}
|
||||
pub(crate) fn r9() -> Reg {
|
||||
gpr(9, 11)
|
||||
}
|
||||
pub(crate) fn r10() -> Reg {
|
||||
gpr(10, 12)
|
||||
}
|
||||
pub(crate) fn r11() -> Reg {
|
||||
gpr(11, 13)
|
||||
}
|
||||
|
||||
fn fpr(enc: u8, index: u8) -> Reg {
|
||||
Reg::new_real(RegClass::V128, enc, index)
|
||||
}
|
||||
fn xmm0() -> Reg {
|
||||
fpr(0, 14)
|
||||
}
|
||||
fn xmm1() -> Reg {
|
||||
fpr(1, 15)
|
||||
}
|
||||
fn xmm2() -> Reg {
|
||||
fpr(2, 16)
|
||||
}
|
||||
fn xmm3() -> Reg {
|
||||
fpr(3, 17)
|
||||
}
|
||||
fn xmm4() -> Reg {
|
||||
fpr(4, 18)
|
||||
}
|
||||
fn xmm5() -> Reg {
|
||||
fpr(5, 19)
|
||||
}
|
||||
fn xmm6() -> Reg {
|
||||
fpr(6, 20)
|
||||
}
|
||||
fn xmm7() -> Reg {
|
||||
fpr(7, 21)
|
||||
}
|
||||
fn xmm8() -> Reg {
|
||||
fpr(8, 22)
|
||||
}
|
||||
fn xmm9() -> Reg {
|
||||
fpr(9, 23)
|
||||
}
|
||||
fn xmm10() -> Reg {
|
||||
fpr(10, 24)
|
||||
}
|
||||
fn xmm11() -> Reg {
|
||||
fpr(11, 25)
|
||||
}
|
||||
fn xmm12() -> Reg {
|
||||
fpr(12, 26)
|
||||
}
|
||||
fn xmm13() -> Reg {
|
||||
fpr(13, 27)
|
||||
}
|
||||
fn xmm14() -> Reg {
|
||||
fpr(14, 28)
|
||||
}
|
||||
fn xmm15() -> Reg {
|
||||
fpr(15, 29)
|
||||
}
|
||||
|
||||
pub(crate) fn rsp() -> Reg {
|
||||
gpr(ENC_RSP, 30)
|
||||
}
|
||||
pub(crate) fn rbp() -> Reg {
|
||||
gpr(ENC_RBP, 31)
|
||||
}
|
||||
|
||||
/// Create the register universe for X64.
|
||||
///
|
||||
/// The ordering of registers matters, as commented in the file doc comment: assumes the
|
||||
/// calling-convention is SystemV, at the moment.
|
||||
pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = Vec::<(RealReg, String)>::new();
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Integer regs.
|
||||
let mut base = regs.len();
|
||||
|
||||
// Callee-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((r12().to_real_reg(), "%r12".into()));
|
||||
regs.push((r13().to_real_reg(), "%r13".into()));
|
||||
regs.push((r14().to_real_reg(), "%r14".into()));
|
||||
regs.push((r15().to_real_reg(), "%r15".into()));
|
||||
regs.push((rbx().to_real_reg(), "%rbx".into()));
|
||||
|
||||
// Caller-saved, in the SystemV x86_64 ABI.
|
||||
regs.push((rsi().to_real_reg(), "%rsi".into()));
|
||||
regs.push((rdi().to_real_reg(), "%rdi".into()));
|
||||
regs.push((rax().to_real_reg(), "%rax".into()));
|
||||
regs.push((rcx().to_real_reg(), "%rcx".into()));
|
||||
regs.push((rdx().to_real_reg(), "%rdx".into()));
|
||||
regs.push((r8().to_real_reg(), "%r8".into()));
|
||||
regs.push((r9().to_real_reg(), "%r9".into()));
|
||||
regs.push((r10().to_real_reg(), "%r10".into()));
|
||||
regs.push((r11().to_real_reg(), "%r11".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(r12().get_index()),
|
||||
});
|
||||
|
||||
// XMM registers
|
||||
base = regs.len();
|
||||
regs.push((xmm0().to_real_reg(), "%xmm0".into()));
|
||||
regs.push((xmm1().to_real_reg(), "%xmm1".into()));
|
||||
regs.push((xmm2().to_real_reg(), "%xmm2".into()));
|
||||
regs.push((xmm3().to_real_reg(), "%xmm3".into()));
|
||||
regs.push((xmm4().to_real_reg(), "%xmm4".into()));
|
||||
regs.push((xmm5().to_real_reg(), "%xmm5".into()));
|
||||
regs.push((xmm6().to_real_reg(), "%xmm6".into()));
|
||||
regs.push((xmm7().to_real_reg(), "%xmm7".into()));
|
||||
regs.push((xmm8().to_real_reg(), "%xmm8".into()));
|
||||
regs.push((xmm9().to_real_reg(), "%xmm9".into()));
|
||||
regs.push((xmm10().to_real_reg(), "%xmm10".into()));
|
||||
regs.push((xmm11().to_real_reg(), "%xmm11".into()));
|
||||
regs.push((xmm12().to_real_reg(), "%xmm12".into()));
|
||||
regs.push((xmm13().to_real_reg(), "%xmm13".into()));
|
||||
regs.push((xmm14().to_real_reg(), "%xmm14".into()));
|
||||
regs.push((xmm15().to_real_reg(), "%xmm15".into()));
|
||||
|
||||
allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(xmm15().get_index()),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((rsp().to_real_reg(), "%rsp".into()));
|
||||
regs.push((rbp().to_real_reg(), "%rbp".into()));
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
allocable,
|
||||
allocable_by_class,
|
||||
}
|
||||
}
|
||||
|
||||
/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
|
||||
/// smaller size (4, 2 or 1 bytes).
|
||||
pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
if reg.get_class() != RegClass::I64 || size == 8 {
|
||||
// We can't do any better.
|
||||
return s;
|
||||
}
|
||||
|
||||
if reg.is_real() {
|
||||
// Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
|
||||
// describe diplomatically as "a kludge", but it's only debug code.
|
||||
let remapper = match s.as_str() {
|
||||
"%rax" => Some(["%eax", "%ax", "%al"]),
|
||||
"%rbx" => Some(["%ebx", "%bx", "%bl"]),
|
||||
"%rcx" => Some(["%ecx", "%cx", "%cl"]),
|
||||
"%rdx" => Some(["%edx", "%dx", "%dl"]),
|
||||
"%rsi" => Some(["%esi", "%si", "%sil"]),
|
||||
"%rdi" => Some(["%edi", "%di", "%dil"]),
|
||||
"%rbp" => Some(["%ebp", "%bp", "%bpl"]),
|
||||
"%rsp" => Some(["%esp", "%sp", "%spl"]),
|
||||
"%r8" => Some(["%r8d", "%r8w", "%r8b"]),
|
||||
"%r9" => Some(["%r9d", "%r9w", "%r9b"]),
|
||||
"%r10" => Some(["%r10d", "%r10w", "%r10b"]),
|
||||
"%r11" => Some(["%r11d", "%r11w", "%r11b"]),
|
||||
"%r12" => Some(["%r12d", "%r12w", "%r12b"]),
|
||||
"%r13" => Some(["%r13d", "%r13w", "%r13b"]),
|
||||
"%r14" => Some(["%r14d", "%r14w", "%r14b"]),
|
||||
"%r15" => Some(["%r15d", "%r15w", "%r15b"]),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(smaller_names) = remapper {
|
||||
match size {
|
||||
4 => s = smaller_names[0].into(),
|
||||
2 => s = smaller_names[1].into(),
|
||||
1 => s = smaller_names[2].into(),
|
||||
_ => panic!("show_ireg_sized: real"),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
|
||||
let suffix = match size {
|
||||
4 => "l",
|
||||
2 => "w",
|
||||
1 => "b",
|
||||
_ => panic!("show_ireg_sized: virtual"),
|
||||
};
|
||||
s = s + suffix;
|
||||
}
|
||||
|
||||
s
|
||||
}
|
|
@ -1,343 +0,0 @@
|
|||
//! Lowering rules for X64.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use regalloc::{Reg, Writable};
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::types;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, Type};
|
||||
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::result::CodegenResult;
|
||||
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::isa::x64::X64Backend;
|
||||
|
||||
/// Context passed to all lowering functions.
|
||||
type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
|
||||
|
||||
//=============================================================================
|
||||
// Helpers for instruction lowering.
|
||||
|
||||
fn is_int_ty(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 | types::I64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_to_is64(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 => false,
|
||||
types::I64 => true,
|
||||
_ => panic!("type {} is none of I8, I16, I32 or I64", ty),
|
||||
}
|
||||
}
|
||||
|
||||
fn int_ty_to_sizeB(ty: Type) -> u8 {
|
||||
match ty {
|
||||
types::I8 => 1,
|
||||
types::I16 => 2,
|
||||
types::I32 => 4,
|
||||
types::I64 => 8,
|
||||
_ => panic!("ity_to_sizeB"),
|
||||
}
|
||||
}
|
||||
|
||||
fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
|
||||
let inst_data = ctx.data(iri);
|
||||
if inst_data.opcode() == Opcode::Null {
|
||||
Some(0)
|
||||
} else {
|
||||
match inst_data {
|
||||
&InstructionData::UnaryImm { opcode: _, imm } => {
|
||||
// Only has Into for i64; we use u64 elsewhere, so we cast.
|
||||
let imm: i64 = imm.into();
|
||||
Some(imm as u64)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inst_condcode(data: &InstructionData) -> IntCC {
|
||||
match data {
|
||||
&InstructionData::IntCond { cond, .. }
|
||||
| &InstructionData::BranchIcmp { cond, .. }
|
||||
| &InstructionData::IntCompare { cond, .. }
|
||||
| &InstructionData::IntCondTrap { cond, .. }
|
||||
| &InstructionData::BranchInt { cond, .. }
|
||||
| &InstructionData::IntSelect { cond, .. }
|
||||
| &InstructionData::IntCompareImm { cond, .. } => cond,
|
||||
_ => panic!("inst_condcode(x64): unhandled: {:?}", data),
|
||||
}
|
||||
}
|
||||
|
||||
fn intCC_to_x64_CC(cc: IntCC) -> CC {
|
||||
match cc {
|
||||
IntCC::Equal => CC::Z,
|
||||
IntCC::NotEqual => CC::NZ,
|
||||
IntCC::SignedGreaterThanOrEqual => CC::NL,
|
||||
IntCC::SignedGreaterThan => CC::NLE,
|
||||
IntCC::SignedLessThanOrEqual => CC::LE,
|
||||
IntCC::SignedLessThan => CC::L,
|
||||
IntCC::UnsignedGreaterThanOrEqual => CC::NB,
|
||||
IntCC::UnsignedGreaterThan => CC::NBE,
|
||||
IntCC::UnsignedLessThanOrEqual => CC::BE,
|
||||
IntCC::UnsignedLessThan => CC::B,
|
||||
IntCC::Overflow => CC::O,
|
||||
IntCC::NotOverflow => CC::NO,
|
||||
}
|
||||
}
|
||||
|
||||
fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
|
||||
let inputs = ctx.get_input(iri, input);
|
||||
ctx.use_input_reg(inputs);
|
||||
inputs.reg
|
||||
}
|
||||
|
||||
fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
|
||||
ctx.get_output(iri, output)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Top-level instruction lowering entry point, for one instruction.
|
||||
|
||||
/// Actually codegen an instruction's results into registers.
|
||||
fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
|
||||
let op = ctx.data(iri).opcode();
|
||||
let ty = if ctx.num_outputs(iri) == 1 {
|
||||
Some(ctx.output_ty(iri, 0))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// This is all outstandingly feeble. TODO: much better!
|
||||
|
||||
match op {
|
||||
Opcode::Iconst => {
|
||||
if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
|
||||
// Get exactly the bit pattern in 'w64' into the dest. No
|
||||
// monkeying with sign extension etc.
|
||||
let dstIs64 = w64 > 0xFFFF_FFFF;
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
ctx.emit(Inst::imm_r(dstIs64, w64, regD));
|
||||
} else {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Iadd | Opcode::Isub => {
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
let regL = input_to_reg(ctx, iri, 0);
|
||||
let regR = input_to_reg(ctx, iri, 1);
|
||||
let is64 = int_ty_to_is64(ty.unwrap());
|
||||
let how = if op == Opcode::Iadd {
|
||||
RMI_R_Op::Add
|
||||
} else {
|
||||
RMI_R_Op::Sub
|
||||
};
|
||||
ctx.emit(Inst::mov_r_r(true, regL, regD));
|
||||
ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
// TODO: implement imm shift value into insn
|
||||
let tySL = ctx.input_ty(iri, 0);
|
||||
let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
|
||||
let regSL = input_to_reg(ctx, iri, 0);
|
||||
let regSR = input_to_reg(ctx, iri, 1);
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
|
||||
let how = match op {
|
||||
Opcode::Ishl => ShiftKind::Left,
|
||||
Opcode::Ushr => ShiftKind::RightZ,
|
||||
Opcode::Sshr => ShiftKind::RightS,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is64 = tyD == types::I64;
|
||||
let r_rcx = regs::rcx();
|
||||
let w_rcx = Writable::<Reg>::from_reg(r_rcx);
|
||||
ctx.emit(Inst::mov_r_r(true, regSL, regD));
|
||||
ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
|
||||
ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
|
||||
} else {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Uextend | Opcode::Sextend => {
|
||||
// TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
|
||||
// don't accept a register source operand. They should be changed
|
||||
// so as to have _RM_R form.
|
||||
// TODO2: if the source operand is a load, incorporate that.
|
||||
let isZX = op == Opcode::Uextend;
|
||||
let tyS = ctx.input_ty(iri, 0);
|
||||
let tyD = ctx.output_ty(iri, 0);
|
||||
let regS = input_to_reg(ctx, iri, 0);
|
||||
let regD = output_to_reg(ctx, iri, 0);
|
||||
ctx.emit(Inst::mov_r_r(true, regS, regD));
|
||||
match (tyS, tyD, isZX) {
|
||||
(types::I8, types::I64, false) => {
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
|
||||
ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FallthroughReturn | Opcode::Return => {
|
||||
for i in 0..ctx.num_inputs(iri) {
|
||||
let src_reg = input_to_reg(ctx, iri, i);
|
||||
let retval_reg = ctx.retval(i);
|
||||
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
|
||||
}
|
||||
// N.B.: the Ret itself is generated by the ABI.
|
||||
}
|
||||
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::UdivImm
|
||||
| Opcode::SdivImm
|
||||
| Opcode::UremImm
|
||||
| Opcode::SremImm
|
||||
| Opcode::IrsubImm
|
||||
| Opcode::IaddCin
|
||||
| Opcode::IaddIfcin
|
||||
| Opcode::IaddCout
|
||||
| Opcode::IaddIfcout
|
||||
| Opcode::IaddCarry
|
||||
| Opcode::IaddIfcarry
|
||||
| Opcode::IsubBin
|
||||
| Opcode::IsubIfbin
|
||||
| Opcode::IsubBout
|
||||
| Opcode::IsubIfbout
|
||||
| Opcode::IsubBorrow
|
||||
| Opcode::IsubIfborrow
|
||||
| Opcode::BandImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BxorImm
|
||||
| Opcode::RotlImm
|
||||
| Opcode::RotrImm
|
||||
| Opcode::IshlImm
|
||||
| Opcode::UshrImm
|
||||
| Opcode::SshrImm => {
|
||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||
}
|
||||
|
||||
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Lowering-backend trait implementation.
|
||||
|
||||
impl LowerBackend for X64Backend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_insn_to_regs(ctx, ir_inst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
branches: &[IRInst],
|
||||
targets: &[MachLabel],
|
||||
fallthrough: Option<MachLabel>,
|
||||
) -> CodegenResult<()> {
|
||||
// A block should end with at most two branches. The first may be a
|
||||
// conditional branch; a conditional branch can be followed only by an
|
||||
// unconditional branch or fallthrough. Otherwise, if only one branch,
|
||||
// it may be an unconditional branch, a fallthrough, a return, or a
|
||||
// trap. These conditions are verified by `is_ebb_basic()` during the
|
||||
// verifier pass.
|
||||
assert!(branches.len() <= 2);
|
||||
|
||||
let mut unimplemented = false;
|
||||
|
||||
if branches.len() == 2 {
|
||||
// Must be a conditional branch followed by an unconditional branch.
|
||||
let op0 = ctx.data(branches[0]).opcode();
|
||||
let op1 = ctx.data(branches[1]).opcode();
|
||||
|
||||
println!(
|
||||
"QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
|
||||
op0, op1
|
||||
);
|
||||
|
||||
assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
|
||||
let taken = BranchTarget::Label(targets[0]);
|
||||
let not_taken = match op1 {
|
||||
Opcode::Jump => BranchTarget::Label(targets[1]),
|
||||
Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
|
||||
_ => unreachable!(), // assert above.
|
||||
};
|
||||
match op0 {
|
||||
Opcode::Brz | Opcode::Brnz => {
|
||||
let tyS = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(tyS) {
|
||||
let rS = input_to_reg(ctx, branches[0], 0);
|
||||
let cc = match op0 {
|
||||
Opcode::Brz => CC::Z,
|
||||
Opcode::Brnz => CC::NZ,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let sizeB = int_ty_to_sizeB(tyS);
|
||||
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
Opcode::BrIcmp => {
|
||||
let tyS = ctx.input_ty(branches[0], 0);
|
||||
if is_int_ty(tyS) {
|
||||
let rSL = input_to_reg(ctx, branches[0], 0);
|
||||
let rSR = input_to_reg(ctx, branches[0], 1);
|
||||
let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
|
||||
let sizeB = int_ty_to_sizeB(tyS);
|
||||
// FIXME verify rSR vs rSL ordering
|
||||
ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
|
||||
ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
|
||||
} else {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
// TODO: Brif/icmp, Brff/icmp, jump tables
|
||||
_ => {
|
||||
unimplemented = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert!(branches.len() == 1);
|
||||
|
||||
// Must be an unconditional branch or trap.
|
||||
let op = ctx.data(branches[0]).opcode();
|
||||
match op {
|
||||
Opcode::Jump => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Fallthrough => {
|
||||
ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
|
||||
}
|
||||
Opcode::Trap => {
|
||||
unimplemented = true;
|
||||
}
|
||||
_ => panic!("Unknown branch type!"),
|
||||
}
|
||||
}
|
||||
|
||||
if unimplemented {
|
||||
unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
//! X86_64-bit Instruction Set Architecture.
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
||||
use regalloc::RealRegUniverse;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{self, Flags};
|
||||
|
||||
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
|
||||
|
||||
mod abi;
|
||||
mod inst;
|
||||
mod lower;
|
||||
|
||||
/// An X64 backend.
|
||||
pub(crate) struct X64Backend {
|
||||
triple: Triple,
|
||||
flags: Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl X64Backend {
|
||||
/// Create a new X64 backend with the given (shared) flags.
|
||||
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
|
||||
let reg_universe = create_reg_universe_systemv(&flags);
|
||||
Self {
|
||||
triple,
|
||||
flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
|
||||
// This performs lowering to VCode, register-allocates the code, computes
|
||||
// block layout and finalizes branches. The result is ready for binary emission.
|
||||
let abi = Box::new(abi::X64ABIBody::new(&func, flags));
|
||||
compile::compile::<Self>(&func, self, abi)
|
||||
}
|
||||
}
|
||||
|
||||
impl MachBackend for X64Backend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags.clone())?;
|
||||
let buffer = vcode.emit();
|
||||
let buffer = buffer.finish();
|
||||
let frame_size = vcode.frame_size();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(MachCompileResult {
|
||||
buffer,
|
||||
frame_size,
|
||||
disasm,
|
||||
})
|
||||
}
|
||||
|
||||
fn flags(&self) -> &Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"x64"
|
||||
}
|
||||
|
||||
fn triple(&self) -> Triple {
|
||||
self.triple.clone()
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
&self.reg_universe
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
|
||||
// overflow of an add.
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
|
||||
// underflow of a subtract (carry is borrow for subtract).
|
||||
IntCC::UnsignedGreaterThanOrEqual
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `isa::Builder`.
|
||||
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
|
||||
let backend = X64Backend::new_with_flags(triple, flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@ use super::settings as isa_settings;
|
|||
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use crate::cursor::{Cursor, CursorPosition, EncCursor};
|
||||
use crate::ir;
|
||||
use crate::ir::entities::StackSlot;
|
||||
use crate::ir::immediates::Imm64;
|
||||
use crate::ir::stackslot::{StackOffset, StackSize};
|
||||
use crate::ir::types;
|
||||
|
@ -18,6 +19,7 @@ use crate::regalloc::RegisterSet;
|
|||
use crate::result::CodegenResult;
|
||||
use crate::stack_layout::layout_stack;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::vec::Vec;
|
||||
use core::i32;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -42,7 +44,7 @@ static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
|
|||
///
|
||||
/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
|
||||
/// convention reserves spill space for parameters, you don’t have to use them as such"
|
||||
const WIN_SHADOW_STACK_SPACE: StackSize = 32;
|
||||
const WIN_SHADOW_STACK_SPACE: i32 = 32;
|
||||
|
||||
/// Stack alignment requirement for functions.
|
||||
///
|
||||
|
@ -70,7 +72,6 @@ struct Args {
|
|||
shared_flags: shared_settings::Flags,
|
||||
#[allow(dead_code)]
|
||||
isa_flags: isa_settings::Flags,
|
||||
assigning_returns: bool,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
|
@ -81,13 +82,12 @@ impl Args {
|
|||
call_conv: CallConv,
|
||||
shared_flags: &shared_settings::Flags,
|
||||
isa_flags: &isa_settings::Flags,
|
||||
assigning_returns: bool,
|
||||
) -> Self {
|
||||
let offset = if call_conv.extends_windows_fastcall() {
|
||||
WIN_SHADOW_STACK_SPACE
|
||||
} else {
|
||||
0
|
||||
};
|
||||
} as u32;
|
||||
|
||||
Self {
|
||||
pointer_bytes: bits / 8,
|
||||
|
@ -101,7 +101,6 @@ impl Args {
|
|||
call_conv,
|
||||
shared_flags: shared_flags.clone(),
|
||||
isa_flags: isa_flags.clone(),
|
||||
assigning_returns,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -110,17 +109,6 @@ impl ArgAssigner for Args {
|
|||
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
|
||||
let ty = arg.value_type;
|
||||
|
||||
if ty.bits() > u16::from(self.pointer_bits) {
|
||||
if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
|
||||
// "Any argument that doesn't fit in 8 bytes, or isn't
|
||||
// 1, 2, 4, or 8 bytes, must be passed by reference"
|
||||
return ValueConversion::Pointer(self.pointer_type).into();
|
||||
} else if !ty.is_vector() && !ty.is_float() {
|
||||
// On SystemV large integers and booleans are broken down to fit in a register.
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
}
|
||||
|
||||
// Vectors should stay in vector registers unless SIMD is not enabled--then they are split
|
||||
if ty.is_vector() {
|
||||
if self.shared_flags.enable_simd() {
|
||||
|
@ -131,6 +119,11 @@ impl ArgAssigner for Args {
|
|||
return ValueConversion::VectorSplit.into();
|
||||
}
|
||||
|
||||
// Large integers and booleans are broken down to fit in a register.
|
||||
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
|
||||
// Small integers are extended to the size of a pointer register.
|
||||
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
|
||||
match arg.extension {
|
||||
|
@ -212,7 +205,7 @@ pub fn legalize_signature(
|
|||
PointerWidth::U16 => panic!(),
|
||||
PointerWidth::U32 => {
|
||||
bits = 32;
|
||||
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
|
||||
args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags);
|
||||
}
|
||||
PointerWidth::U64 => {
|
||||
bits = 64;
|
||||
|
@ -224,7 +217,6 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
false,
|
||||
)
|
||||
} else {
|
||||
Args::new(
|
||||
|
@ -234,7 +226,6 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
false,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
@ -254,20 +245,26 @@ pub fn legalize_signature(
|
|||
sig.call_conv,
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
true,
|
||||
);
|
||||
|
||||
// If we don't have enough available return registers
|
||||
// to fit all of the return values, we need to backtrack and start
|
||||
let sig_is_multi_return = sig.is_multi_return();
|
||||
|
||||
// If this is a multi-value return and we don't have enough available return
|
||||
// registers to fit all of the return values, we need to backtrack and start
|
||||
// assigning locations all over again with a different strategy. In order to
|
||||
// do that, we need a copy of the original assigner for the returns.
|
||||
let mut backup_rets = rets.clone();
|
||||
let backup_rets_for_struct_return = if sig_is_multi_return {
|
||||
Some(rets.clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
|
||||
if new_returns
|
||||
.iter()
|
||||
.filter(|r| r.purpose == ArgumentPurpose::Normal)
|
||||
.any(|r| !r.location.is_reg())
|
||||
if sig.is_multi_return()
|
||||
&& new_returns
|
||||
.iter()
|
||||
.filter(|r| r.purpose == ArgumentPurpose::Normal)
|
||||
.any(|r| !r.location.is_reg())
|
||||
{
|
||||
// The return values couldn't all fit into available return
|
||||
// registers. Introduce the use of a struct-return parameter.
|
||||
|
@ -279,7 +276,6 @@ pub fn legalize_signature(
|
|||
purpose: ArgumentPurpose::StructReturn,
|
||||
extension: ArgumentExtension::None,
|
||||
location: ArgumentLoc::Unassigned,
|
||||
legalized_to_pointer: false,
|
||||
};
|
||||
match args.assign(&ret_ptr_param) {
|
||||
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
|
||||
|
@ -289,6 +285,8 @@ pub fn legalize_signature(
|
|||
_ => unreachable!("return pointer should always get a register assignment"),
|
||||
}
|
||||
|
||||
let mut backup_rets = backup_rets_for_struct_return.unwrap();
|
||||
|
||||
// We're using the first return register for the return pointer (like
|
||||
// sys v does).
|
||||
let mut ret_ptr_return = AbiParam {
|
||||
|
@ -296,7 +294,6 @@ pub fn legalize_signature(
|
|||
purpose: ArgumentPurpose::StructReturn,
|
||||
extension: ArgumentExtension::None,
|
||||
location: ArgumentLoc::Unassigned,
|
||||
legalized_to_pointer: false,
|
||||
};
|
||||
match backup_rets.assign(&ret_ptr_return) {
|
||||
ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
|
||||
|
@ -504,7 +501,7 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
|
|||
|
||||
let word_size = StackSize::from(isa.pointer_bytes());
|
||||
let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
|
||||
WIN_SHADOW_STACK_SPACE
|
||||
WIN_SHADOW_STACK_SPACE as u32
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
@ -528,60 +525,50 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
|
||||
}
|
||||
|
||||
let csrs = callee_saved_regs_used(isa, func);
|
||||
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers
|
||||
// return address + frame pointer + all callee-saved registers + shadow space
|
||||
//
|
||||
// Pushing the return address is an implicit function of the `call`
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
let csrs = callee_saved_regs_used(isa, func);
|
||||
let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
|
||||
let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
|
||||
let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
|
||||
let word_size = isa.pointer_bytes() as usize;
|
||||
let num_fprs = csrs.iter(FPR).len();
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
|
||||
|
||||
// FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
|
||||
if fpsr_stack_size > 0 {
|
||||
csr_stack_size = (csr_stack_size + 15) & !15;
|
||||
}
|
||||
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size,
|
||||
offset: Some(-(csr_stack_size as StackOffset)),
|
||||
});
|
||||
|
||||
let is_leaf = func.is_leaf();
|
||||
|
||||
// If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
|
||||
if !is_leaf {
|
||||
// TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
// Only create an FPR stack slot if we're going to save FPRs.
|
||||
let fpr_slot = if num_fprs > 0 {
|
||||
// Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it
|
||||
// seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be
|
||||
// through `stack_load` and `stack_store` (see later comment about issue #1198). Even
|
||||
// though in a certain light FPR preserve/restore is "spilling" an argument, regalloc
|
||||
// implies that `SpillSlot` may be eligible for certain optimizations, and we know with
|
||||
// certainty that this space may not be reused in the function, nor moved around.
|
||||
Some(func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::ExplicitSlot,
|
||||
size: WIN_SHADOW_STACK_SPACE,
|
||||
size: (num_fprs * types::F64X2.bytes() as usize) as u32,
|
||||
offset: None,
|
||||
});
|
||||
}
|
||||
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
|
||||
|
||||
// Subtract the GPR saved register size from the local size because pushes are used for the saves
|
||||
let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let reg_type = isa.pointer_type();
|
||||
let sp_arg_index = if fpsr_stack_size > 0 {
|
||||
let sp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::CalleeSaved,
|
||||
RU::rsp as RegUnit,
|
||||
);
|
||||
let index = func.signature.params.len();
|
||||
func.signature.params.push(sp_arg);
|
||||
Some(index)
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
|
||||
// since cranelift does not support spill slots before incoming args
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)),
|
||||
});
|
||||
|
||||
let is_leaf = func.is_leaf();
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
|
||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let reg_type = isa.pointer_type();
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
|
@ -614,13 +601,19 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
sp_arg_index.is_some(),
|
||||
fpr_slot.as_ref(),
|
||||
isa,
|
||||
);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
|
||||
insert_common_epilogues(
|
||||
&mut pos,
|
||||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
fpr_slot.as_ref(),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -656,20 +649,6 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
|
||||
// Add CSRs to function signature
|
||||
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
|
||||
// On X86-32 all parameters, including vmctx, are passed on stack, and we need
|
||||
// to extract vmctx from the stack before we can save the frame pointer.
|
||||
let sp_arg_index = if isa.pointer_bits() == 32 {
|
||||
let sp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::CalleeSaved,
|
||||
RU::rsp as RegUnit,
|
||||
);
|
||||
let index = func.signature.params.len();
|
||||
func.signature.params.push(sp_arg);
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
reg_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
|
@ -687,18 +666,11 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||
// Set up the cursor and insert the prologue
|
||||
let entry_block = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
|
||||
insert_common_prologue(
|
||||
&mut pos,
|
||||
local_stack_size,
|
||||
reg_type,
|
||||
&csrs,
|
||||
sp_arg_index.is_some(),
|
||||
isa,
|
||||
);
|
||||
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
|
||||
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, None);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -710,18 +682,9 @@ fn insert_common_prologue(
|
|||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
has_sp_param: bool,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
let sp = if has_sp_param {
|
||||
let block = pos.current_block().expect("missing block under cursor");
|
||||
let sp = pos.func.dfg.append_block_param(block, reg_type);
|
||||
pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
|
||||
Some(sp)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If this is a leaf function with zero stack, then there's no need to
|
||||
// insert a stack check since it can't overflow anything and
|
||||
// forward-progress is guarantee so long as loop are handled anyway.
|
||||
|
@ -744,7 +707,7 @@ fn insert_common_prologue(
|
|||
None => pos
|
||||
.func
|
||||
.stack_limit
|
||||
.map(|gv| interpret_gv(pos, gv, sp, scratch)),
|
||||
.map(|gv| interpret_gv(pos, gv, scratch)),
|
||||
};
|
||||
if let Some(stack_limit_arg) = stack_limit_arg {
|
||||
insert_stack_check(pos, stack_size, stack_limit_arg);
|
||||
|
@ -817,27 +780,38 @@ fn insert_common_prologue(
|
|||
}
|
||||
}
|
||||
|
||||
// With the stack pointer adjusted, save any callee-saved floating point registers via offset
|
||||
// FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
|
||||
// Now that RSP is prepared for the function, we can use stack slots:
|
||||
let mut last_fpr_save = None;
|
||||
if let Some(fpr_slot) = fpr_slot {
|
||||
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||
|
||||
for (i, reg) in csrs.iter(FPR).enumerate() {
|
||||
// Append param to entry block
|
||||
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
|
||||
// `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||
// instead of hoping that the addr/store will be combined later.
|
||||
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||
|
||||
// Since regalloc has already run, we must assign a location.
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||
// function entry.
|
||||
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||
|
||||
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
|
||||
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
|
||||
+ (stack_size % types::F64X2.bytes() as i64);
|
||||
let mut fpr_offset = 0;
|
||||
|
||||
last_fpr_save = Some(pos.ins().store(
|
||||
ir::MemFlags::trusted(),
|
||||
csr_arg,
|
||||
sp.expect("FPR save requires SP param"),
|
||||
(stack_size - offset) as i32,
|
||||
));
|
||||
for reg in csrs.iter(FPR) {
|
||||
// Append param to entry Block
|
||||
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
|
||||
|
||||
// Since regalloc has already run, we must assign a location.
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
last_fpr_save =
|
||||
Some(
|
||||
pos.ins()
|
||||
.store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset),
|
||||
);
|
||||
|
||||
fpr_offset += types::F64X2.bytes() as i32;
|
||||
}
|
||||
}
|
||||
|
||||
pos.func.prologue_end = Some(
|
||||
|
@ -860,55 +834,19 @@ fn insert_common_prologue(
|
|||
/// compared to the stack pointer, but currently it serves enough functionality
|
||||
/// to get this implemented in `wasmtime` itself. This'll likely get expanded a
|
||||
/// bit over time!
|
||||
fn interpret_gv(
|
||||
pos: &mut EncCursor,
|
||||
gv: ir::GlobalValue,
|
||||
sp: Option<ir::Value>,
|
||||
scratch: ir::ValueLoc,
|
||||
) -> ir::Value {
|
||||
fn interpret_gv(pos: &mut EncCursor, gv: ir::GlobalValue, scratch: ir::ValueLoc) -> ir::Value {
|
||||
match pos.func.global_values[gv] {
|
||||
ir::GlobalValueData::VMContext => {
|
||||
let vmctx_index = pos
|
||||
.func
|
||||
.signature
|
||||
.special_param_index(ir::ArgumentPurpose::VMContext)
|
||||
.expect("no vmcontext parameter found");
|
||||
match pos.func.signature.params[vmctx_index] {
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Reg(_),
|
||||
..
|
||||
} => {
|
||||
let entry = pos.func.layout.entry_block().unwrap();
|
||||
pos.func.dfg.block_params(entry)[vmctx_index]
|
||||
}
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Stack(offset),
|
||||
value_type,
|
||||
..
|
||||
} => {
|
||||
let offset =
|
||||
offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
|
||||
// The following access can be marked `trusted` because it is a load of an argument. We
|
||||
// know it is safe because it was safe to write it in preparing this function call.
|
||||
let ret =
|
||||
pos.ins()
|
||||
.load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
|
||||
pos.func.locations[ret] = scratch;
|
||||
return ret;
|
||||
}
|
||||
AbiParam {
|
||||
location: ArgumentLoc::Unassigned,
|
||||
..
|
||||
} => unreachable!(),
|
||||
}
|
||||
}
|
||||
ir::GlobalValueData::VMContext => pos
|
||||
.func
|
||||
.special_param(ir::ArgumentPurpose::VMContext)
|
||||
.expect("no vmcontext parameter found"),
|
||||
ir::GlobalValueData::Load {
|
||||
base,
|
||||
offset,
|
||||
global_type,
|
||||
readonly: _,
|
||||
} => {
|
||||
let base = interpret_gv(pos, base, sp, scratch);
|
||||
let base = interpret_gv(pos, base, scratch);
|
||||
let ret = pos
|
||||
.ins()
|
||||
.load(global_type, ir::MemFlags::trusted(), base, offset);
|
||||
|
@ -973,13 +911,13 @@ fn insert_common_epilogues(
|
|||
stack_size: i64,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
sp_arg_index: Option<usize>,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
) {
|
||||
while let Some(block) = pos.next_block() {
|
||||
pos.goto_last_inst(block);
|
||||
if let Some(inst) = pos.current_inst() {
|
||||
if pos.func.dfg[inst].opcode().is_return() {
|
||||
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, sp_arg_index);
|
||||
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs, fpr_slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -993,8 +931,56 @@ fn insert_common_epilogue(
|
|||
pos: &mut EncCursor,
|
||||
reg_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
sp_arg_index: Option<usize>,
|
||||
fpr_slot: Option<&StackSlot>,
|
||||
) {
|
||||
// Even though instructions to restore FPRs are inserted first, we have to append them after
|
||||
// restored GPRs to satisfy parameter order in the return.
|
||||
let mut restored_fpr_values = Vec::new();
|
||||
|
||||
// Restore FPRs before we move RSP and invalidate stack slots.
|
||||
let mut first_fpr_load = None;
|
||||
if let Some(fpr_slot) = fpr_slot {
|
||||
debug_assert!(csrs.iter(FPR).len() != 0);
|
||||
|
||||
// `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base
|
||||
// address. We are well after postopt could run, so load the CSR region base once here,
|
||||
// instead of hoping that the addr/store will be combined later.
|
||||
//
|
||||
// See also: https://github.com/bytecodealliance/wasmtime/pull/1198
|
||||
let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0);
|
||||
|
||||
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
|
||||
|
||||
// Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at
|
||||
// function exit.
|
||||
pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16);
|
||||
|
||||
let mut fpr_offset = 0;
|
||||
|
||||
for reg in csrs.iter(FPR) {
|
||||
let value = pos.ins().load(
|
||||
types::F64X2,
|
||||
ir::MemFlags::trusted(),
|
||||
stack_addr,
|
||||
fpr_offset,
|
||||
);
|
||||
fpr_offset += types::F64X2.bytes() as i32;
|
||||
|
||||
// Unlike GPRs before, we don't need to step back after reach restoration because FPR
|
||||
// restoration is order-insensitive. Furthermore: we want GPR restoration to begin
|
||||
// after FPR restoration, so that stack adjustments occur after we're done relying on
|
||||
// StackSlot validity.
|
||||
|
||||
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
|
||||
restored_fpr_values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
let mut sp_adjust_inst = None;
|
||||
if stack_size > 0 {
|
||||
sp_adjust_inst = Some(pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)));
|
||||
}
|
||||
|
||||
// Insert the pop of the frame pointer
|
||||
let fp_pop = pos.ins().x86_pop(reg_type);
|
||||
let fp_pop_inst = pos.prev_inst().unwrap();
|
||||
|
@ -1005,47 +991,13 @@ fn insert_common_epilogue(
|
|||
let mut first_csr_pop_inst = None;
|
||||
for reg in csrs.iter(GPR) {
|
||||
let csr_pop = pos.ins().x86_pop(reg_type);
|
||||
first_csr_pop_inst = pos.prev_inst();
|
||||
assert!(first_csr_pop_inst.is_some());
|
||||
first_csr_pop_inst = Some(pos.prev_inst().unwrap());
|
||||
pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, csr_pop);
|
||||
}
|
||||
|
||||
// Insert the adjustment of SP
|
||||
let mut sp_adjust_inst = None;
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
|
||||
sp_adjust_inst = pos.prev_inst();
|
||||
assert!(sp_adjust_inst.is_some());
|
||||
}
|
||||
|
||||
let mut first_fpr_load = None;
|
||||
if let Some(index) = sp_arg_index {
|
||||
let sp = pos
|
||||
.func
|
||||
.dfg
|
||||
.block_params(pos.func.layout.entry_block().unwrap())[index];
|
||||
|
||||
// Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
|
||||
for (i, reg) in csrs.iter(FPR).enumerate() {
|
||||
// Offset to where the register is saved relative to RSP, accounting for FPR save alignment
|
||||
let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
|
||||
+ (stack_size % types::F64X2.bytes() as i64);
|
||||
|
||||
let value = pos.ins().load(
|
||||
types::F64X2,
|
||||
ir::MemFlags::trusted(),
|
||||
sp,
|
||||
(stack_size - offset) as i32,
|
||||
);
|
||||
|
||||
first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
|
||||
|
||||
pos.func.locations[value] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, value);
|
||||
}
|
||||
} else {
|
||||
assert!(csrs.iter(FPR).len() == 0);
|
||||
for value in restored_fpr_values.into_iter() {
|
||||
pos.func.dfg.append_inst_arg(inst, value);
|
||||
}
|
||||
|
||||
pos.func.epilogues_start.push(
|
||||
|
|
|
@ -13,7 +13,6 @@ use crate::isa::encoding::base_size;
|
|||
use crate::isa::encoding::{Encoding, RecipeSizing};
|
||||
use crate::isa::RegUnit;
|
||||
use crate::isa::{self, TargetIsa};
|
||||
use crate::legalizer::expand_as_libcall;
|
||||
use crate::predicates;
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
|
@ -247,20 +246,6 @@ fn size_with_inferred_rex_for_inreg0_inreg1(
|
|||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
|
||||
fn size_with_inferred_rex_for_inreg1_inreg2(
|
||||
sizing: &RecipeSizing,
|
||||
_enc: Encoding,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> u8 {
|
||||
// No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
|
||||
let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
|
||||
|| test_input(2, inst, divert, func, is_extended_reg);
|
||||
sizing.base_size + if needs_rex { 1 } else { 0 }
|
||||
}
|
||||
|
||||
/// Infers whether a dynamic REX prefix will be emitted, based on a single
|
||||
/// input register and a single output register.
|
||||
fn size_with_inferred_rex_for_inreg0_outreg0(
|
||||
|
@ -1196,10 +1181,10 @@ fn convert_extractlane(
|
|||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::BinaryImm8 {
|
||||
if let ir::InstructionData::ExtractLane {
|
||||
opcode: ir::Opcode::Extractlane,
|
||||
arg,
|
||||
imm: lane,
|
||||
lane,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
// NOTE: the following legalization assumes that the upper bits of the XMM register do
|
||||
|
@ -1252,10 +1237,10 @@ fn convert_insertlane(
|
|||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::TernaryImm8 {
|
||||
if let ir::InstructionData::InsertLane {
|
||||
opcode: ir::Opcode::Insertlane,
|
||||
args: [vector, replacement],
|
||||
imm: lane,
|
||||
lane,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let value_type = pos.func.dfg.value_type(vector);
|
||||
|
@ -1270,7 +1255,7 @@ fn convert_insertlane(
|
|||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_insertps(vector, replacement, immediate)
|
||||
.x86_insertps(vector, immediate, replacement)
|
||||
}
|
||||
F64X2 => {
|
||||
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
|
||||
|
@ -1298,7 +1283,7 @@ fn convert_insertlane(
|
|||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_pinsr(vector, replacement, lane);
|
||||
.x86_pinsr(vector, lane, replacement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1333,39 +1318,6 @@ fn convert_ineg(
|
|||
}
|
||||
}
|
||||
|
||||
fn expand_dword_to_xmm<'f>(
|
||||
pos: &mut FuncCursor<'_>,
|
||||
arg: ir::Value,
|
||||
arg_type: ir::Type,
|
||||
) -> ir::Value {
|
||||
if arg_type == I64 {
|
||||
let (arg_lo, arg_hi) = pos.ins().isplit(arg);
|
||||
let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
|
||||
let arg = pos.ins().insertlane(arg, arg_hi, 1);
|
||||
let arg = pos.ins().raw_bitcast(I64X2, arg);
|
||||
arg
|
||||
} else {
|
||||
pos.ins().bitcast(I64X2, arg)
|
||||
}
|
||||
}
|
||||
|
||||
fn contract_dword_from_xmm<'f>(
|
||||
pos: &mut FuncCursor<'f>,
|
||||
inst: ir::Inst,
|
||||
ret: ir::Value,
|
||||
ret_type: ir::Type,
|
||||
) {
|
||||
if ret_type == I64 {
|
||||
let ret = pos.ins().raw_bitcast(I32X4, ret);
|
||||
let ret_lo = pos.ins().extractlane(ret, 0);
|
||||
let ret_hi = pos.ins().extractlane(ret, 1);
|
||||
pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
|
||||
} else {
|
||||
let ret = pos.ins().extractlane(ret, 0);
|
||||
pos.func.dfg.replace(inst).ireduce(ret_type, ret);
|
||||
}
|
||||
}
|
||||
|
||||
// Masks for i8x16 unsigned right shift.
|
||||
static USHR_MASKS: [u8; 128] = [
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
|
@ -1427,24 +1379,7 @@ fn convert_ushr(
|
|||
} else if arg0_type.is_vector() {
|
||||
// x86 has encodings for these shifts.
|
||||
pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
|
||||
} else if arg0_type == I64 {
|
||||
// 64 bit shifts need to be legalized on x86_32.
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.has_sse41() {
|
||||
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
|
||||
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
|
||||
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
|
||||
let shifted = pos.ins().x86_psrl(value, amount);
|
||||
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
|
||||
} else {
|
||||
// otherwise legalize to libcall
|
||||
expand_as_libcall(inst, func, isa);
|
||||
}
|
||||
} else {
|
||||
// Everything else should be already legal.
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
@ -1511,76 +1446,12 @@ fn convert_ishl(
|
|||
} else if arg0_type.is_vector() {
|
||||
// x86 has encodings for these shifts.
|
||||
pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
|
||||
} else if arg0_type == I64 {
|
||||
// 64 bit shifts need to be legalized on x86_32.
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.has_sse41() {
|
||||
// if we have pinstrq/pextrq (SSE 4.1), legalize to that
|
||||
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
|
||||
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
|
||||
let shifted = pos.ins().x86_psll(value, amount);
|
||||
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
|
||||
} else {
|
||||
// otherwise legalize to libcall
|
||||
expand_as_libcall(inst, func, isa);
|
||||
}
|
||||
} else {
|
||||
// Everything else should be already legal.
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
|
||||
fn convert_i64x2_imul(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Imul,
|
||||
args: [arg0, arg1],
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty == I64X2 {
|
||||
let x86_isa = isa
|
||||
.as_any()
|
||||
.downcast_ref::<isa::x86::Isa>()
|
||||
.expect("the target ISA must be x86 at this point");
|
||||
if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
|
||||
// If we have certain AVX512 features, we can lower this instruction simply.
|
||||
pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
|
||||
} else {
|
||||
// Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
|
||||
// 64-bit lane into 32-bit high and low sections using shifting and then performs
|
||||
// the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
|
||||
// concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
|
||||
let high0 = pos.ins().ushr_imm(arg0, 32);
|
||||
let mul0 = pos.ins().x86_pmuludq(high0, arg1);
|
||||
let high1 = pos.ins().ushr_imm(arg1, 32);
|
||||
let mul1 = pos.ins().x86_pmuludq(high1, arg0);
|
||||
let addhigh = pos.ins().iadd(mul0, mul1);
|
||||
let high = pos.ins().ishl_imm(addhigh, 32);
|
||||
let low = pos.ins().x86_pmuludq(arg0, arg1);
|
||||
pos.func.dfg.replace(inst).iadd(low, high);
|
||||
}
|
||||
} else {
|
||||
unreachable!(
|
||||
"{} should be encodable; it cannot be legalized by convert_i64x2_imul",
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_tls_value(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
|
|
|
@ -23,7 +23,6 @@ use crate::result::CodegenResult;
|
|||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
|
@ -54,23 +53,12 @@ fn isa_constructor(
|
|||
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
|
||||
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
|
||||
};
|
||||
|
||||
let isa_flags = settings::Flags::new(&shared_flags, builder);
|
||||
|
||||
if isa_flags.use_new_backend() {
|
||||
#[cfg(not(feature = "x64"))]
|
||||
panic!("new backend x86 support not included by cargo features!");
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
super::x64::isa_builder(triple).finish(shared_flags)
|
||||
} else {
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
|
@ -185,10 +173,6 @@ impl TargetIsa for Isa {
|
|||
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
|
||||
Some(unwind::systemv::create_cie())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
|
|
|
@ -28,7 +28,22 @@ pub(crate) fn create_unwind_info(
|
|||
let mut prologue_size = 0;
|
||||
let mut unwind_codes = Vec::new();
|
||||
let mut found_end = false;
|
||||
let mut xmm_save_count: u8 = 0;
|
||||
|
||||
// Have we saved at least one FPR? if so, we might have to check additional constraints.
|
||||
let mut saved_fpr = false;
|
||||
|
||||
// In addition to the min offset for a callee-save, we need to know the offset from the
|
||||
// frame base to the stack pointer, so that we can record an unwind offset that spans only
|
||||
// to the end of callee-save space.
|
||||
let mut static_frame_allocation_size = 0u32;
|
||||
|
||||
// For the time being, FPR preservation is split into a stack_addr and later store/load.
|
||||
// Store the register used for stack store and ensure it is the same register with no
|
||||
// intervening changes to the frame size.
|
||||
let mut callee_save_region_reg = None;
|
||||
// Also record the callee-save region's offset from RSP, because it must be added to FPR
|
||||
// save offsets to compute an offset from the frame base.
|
||||
let mut callee_save_offset = None;
|
||||
|
||||
for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) {
|
||||
// x64 ABI prologues cannot exceed 255 bytes in length
|
||||
|
@ -45,6 +60,8 @@ pub(crate) fn create_unwind_info(
|
|||
InstructionData::Unary { opcode, arg } => {
|
||||
match opcode {
|
||||
Opcode::X86Push => {
|
||||
static_frame_allocation_size += 8;
|
||||
|
||||
unwind_codes.push(UnwindCode::PushRegister {
|
||||
offset: unwind_offset,
|
||||
reg: GPR.index_of(func.locations[arg].unwrap_reg()) as u8,
|
||||
|
@ -53,6 +70,7 @@ pub(crate) fn create_unwind_info(
|
|||
Opcode::AdjustSpDown => {
|
||||
let stack_size =
|
||||
stack_size.expect("expected a previous stack size instruction");
|
||||
static_frame_allocation_size += stack_size;
|
||||
|
||||
// This is used when calling a stack check function
|
||||
// We need to track the assignment to RAX which has the size of the stack
|
||||
|
@ -67,6 +85,10 @@ pub(crate) fn create_unwind_info(
|
|||
InstructionData::CopySpecial { src, dst, .. } => {
|
||||
if let Some(frame_register) = frame_register {
|
||||
if src == (RU::rsp as RegUnit) && dst == frame_register {
|
||||
// Constructing an rbp-based stack frame, so the static frame
|
||||
// allocation restarts at 0 from here.
|
||||
static_frame_allocation_size = 0;
|
||||
|
||||
unwind_codes.push(UnwindCode::SetFramePointer {
|
||||
offset: unwind_offset,
|
||||
sp_offset: 0,
|
||||
|
@ -91,7 +113,7 @@ pub(crate) fn create_unwind_info(
|
|||
let imm: i64 = imm.into();
|
||||
assert!(imm <= core::u32::MAX as i64);
|
||||
|
||||
stack_size = Some(imm as u32);
|
||||
static_frame_allocation_size += imm as u32;
|
||||
|
||||
unwind_codes.push(UnwindCode::StackAlloc {
|
||||
offset: unwind_offset,
|
||||
|
@ -101,27 +123,52 @@ pub(crate) fn create_unwind_info(
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::StackLoad {
|
||||
opcode: Opcode::StackAddr,
|
||||
stack_slot,
|
||||
offset: _,
|
||||
} => {
|
||||
let result = func.dfg.inst_results(inst).get(0).unwrap();
|
||||
if let ValueLoc::Reg(frame_reg) = func.locations[*result] {
|
||||
callee_save_region_reg = Some(frame_reg);
|
||||
|
||||
// Figure out the offset in the call frame that `frame_reg` will have.
|
||||
let frame_size = func
|
||||
.stack_slots
|
||||
.layout_info
|
||||
.expect("func's stack slots have layout info if stack operations exist")
|
||||
.frame_size;
|
||||
// Because we're well after the prologue has been constructed, stack slots
|
||||
// must have been laid out...
|
||||
let slot_offset = func.stack_slots[stack_slot]
|
||||
.offset
|
||||
.expect("callee-save slot has an offset computed");
|
||||
let frame_offset = frame_size as i32 + slot_offset;
|
||||
|
||||
callee_save_offset = Some(frame_offset as u32);
|
||||
}
|
||||
}
|
||||
InstructionData::Store {
|
||||
opcode: Opcode::Store,
|
||||
args: [arg1, arg2],
|
||||
flags: _flags,
|
||||
offset,
|
||||
..
|
||||
} => {
|
||||
if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
|
||||
if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) =
|
||||
(func.locations[arg1], func.locations[arg2])
|
||||
{
|
||||
// If this is a save of an FPR, record an unwind operation
|
||||
// Note: the stack_offset here is relative to an adjusted SP
|
||||
// This will be fixed up later to be based on the frame pointer offset
|
||||
if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
|
||||
let offset: i32 = offset.into();
|
||||
unwind_codes.push(UnwindCode::SaveXmm {
|
||||
offset: unwind_offset,
|
||||
reg: src as u8,
|
||||
stack_offset: offset as u32,
|
||||
});
|
||||
|
||||
xmm_save_count += 1;
|
||||
if Some(base_ru) == callee_save_region_reg {
|
||||
let offset_int: i32 = offset.into();
|
||||
assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error");
|
||||
let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset");
|
||||
if FPR.contains(ru) {
|
||||
saved_fpr = true;
|
||||
unwind_codes.push(UnwindCode::SaveXmm {
|
||||
offset: unwind_offset,
|
||||
reg: ru as u8,
|
||||
stack_offset: offset_int,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -136,45 +183,41 @@ pub(crate) fn create_unwind_info(
|
|||
|
||||
assert!(found_end);
|
||||
|
||||
// When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame
|
||||
// register minus some offset, forming a "base address". This attempts to calculate the frame register offset
|
||||
// while updating the XMM save offsets to be relative from this "base address" rather than RSP.
|
||||
let mut frame_register_offset = 0;
|
||||
if frame_register.is_some() && xmm_save_count > 0 {
|
||||
// Determine the number of 16-byte slots used for all CSRs (including GPRs)
|
||||
// The "frame register offset" will point at the last slot used (i.e. the last saved FPR)
|
||||
// Assumption: each FPR is stored at a lower address than the previous one
|
||||
let mut last_stack_offset = None;
|
||||
let mut fpr_save_count: u8 = 0;
|
||||
let mut gpr_push_count: u8 = 0;
|
||||
for code in unwind_codes.iter_mut() {
|
||||
match code {
|
||||
UnwindCode::SaveXmm { stack_offset, .. } => {
|
||||
if let Some(last) = last_stack_offset {
|
||||
assert!(last > *stack_offset);
|
||||
}
|
||||
last_stack_offset = Some(*stack_offset);
|
||||
fpr_save_count += 1;
|
||||
*stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16;
|
||||
}
|
||||
UnwindCode::PushRegister { .. } => {
|
||||
gpr_push_count += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if saved_fpr {
|
||||
if static_frame_allocation_size > 240 && saved_fpr {
|
||||
warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \
|
||||
This is a Cranelift implementation limit, see \
|
||||
https://github.com/bytecodealliance/wasmtime/issues/1475",
|
||||
static_frame_allocation_size);
|
||||
return Err(CodegenError::ImplLimitExceeded);
|
||||
}
|
||||
assert_eq!(fpr_save_count, xmm_save_count);
|
||||
|
||||
// Account for alignment space when there's an odd number of GPR pushes
|
||||
// Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division
|
||||
frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2);
|
||||
// Only test static frame size is 16-byte aligned when an FPR is saved to avoid
|
||||
// panicking when alignment is elided because no FPRs are saved and no child calls are
|
||||
// made.
|
||||
assert!(
|
||||
static_frame_allocation_size % 16 == 0,
|
||||
"static frame allocation must be a multiple of 16"
|
||||
);
|
||||
}
|
||||
|
||||
// Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at
|
||||
// one end of the call frame, and RSP at the other, required offsets are arbitrarily large.
|
||||
// Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large
|
||||
// frames are inexpressible, and we cannot actually compile the function. In case there are
|
||||
// no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing
|
||||
// will actually check it. This, then, avoids panics when compiling functions with large
|
||||
// call frames.
|
||||
let reported_frame_offset = if saved_fpr {
|
||||
(static_frame_allocation_size / 16) as u8
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Ok(Some(UnwindInfo {
|
||||
flags: 0, // this assumes cranelift functions have no SEH handlers
|
||||
prologue_size: prologue_size as u8,
|
||||
frame_register: frame_register.map(|r| GPR.index_of(r) as u8),
|
||||
frame_register_offset,
|
||||
frame_register_offset: reported_frame_offset,
|
||||
unwind_codes,
|
||||
}))
|
||||
}
|
||||
|
@ -241,7 +284,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 9,
|
||||
size: 64
|
||||
size: 64 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -260,7 +303,7 @@ mod tests {
|
|||
0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg)
|
||||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x09, // Prolog offset
|
||||
0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
|
||||
0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
@ -306,7 +349,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 10000
|
||||
size: 10000 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -326,8 +369,8 @@ mod tests {
|
|||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
|
||||
0xE2, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
|
||||
0xE6, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
@ -371,7 +414,7 @@ mod tests {
|
|||
},
|
||||
UnwindCode::StackAlloc {
|
||||
offset: 27,
|
||||
size: 1000000
|
||||
size: 1000000 + 32
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -391,10 +434,10 @@ mod tests {
|
|||
0x05, // Frame register + offset (RBP with 0 offset)
|
||||
0x1B, // Prolog offset
|
||||
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
|
||||
0x40, // Byte 1 of size
|
||||
0x60, // Byte 1 of size
|
||||
0x42, // Byte 2 of size
|
||||
0x0F, // Byte 3 of size
|
||||
0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
|
||||
0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes)
|
||||
0x05, // Prolog offset
|
||||
0x03, // Operation 3 (save frame register), stack pointer offset = 0
|
||||
0x02, // Prolog offset
|
||||
|
|
|
@ -504,13 +504,6 @@ where
|
|||
// this value.
|
||||
pos.ins().with_results([into_result]).ireduce(ty, arg)
|
||||
}
|
||||
// ABI argument is a pointer to the value we want.
|
||||
ValueConversion::Pointer(abi_ty) => {
|
||||
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
pos.ins()
|
||||
.with_results([into_result])
|
||||
.load(ty, MemFlags::new(), arg, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -570,18 +563,6 @@ fn convert_to_abi<PutArg>(
|
|||
let arg = pos.ins().uextend(abi_ty, value);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
ValueConversion::Pointer(abi_ty) => {
|
||||
// Note: This conversion can only happen for call arguments,
|
||||
// so we can allocate the value on stack safely.
|
||||
let stack_slot = pos.func.create_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
size: ty.bytes(),
|
||||
offset: None,
|
||||
});
|
||||
let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0);
|
||||
pos.ins().store(MemFlags::new(), value, arg, 0);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -776,6 +757,12 @@ pub fn handle_call_abi(
|
|||
{
|
||||
legalize_sret_call(isa, pos, sig_ref, inst);
|
||||
} else {
|
||||
// OK, we need to fix the call arguments to match the ABI signature.
|
||||
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.dfg.signatures[sig_ref].params[abi_arg]
|
||||
});
|
||||
|
||||
if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
|
||||
inst = legalize_inst_results(pos, |func, abi_res| {
|
||||
func.dfg.signatures[sig_ref].returns[abi_res]
|
||||
|
@ -783,13 +770,6 @@ pub fn handle_call_abi(
|
|||
}
|
||||
}
|
||||
|
||||
// Go back and fix the call arguments to match the ABI signature.
|
||||
pos.goto_inst(inst);
|
||||
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.dfg.signatures[sig_ref].params[abi_arg]
|
||||
});
|
||||
|
||||
debug_assert!(
|
||||
check_call_signature(&pos.func.dfg, inst).is_ok(),
|
||||
"Signature still wrong: {}, {}{}",
|
||||
|
@ -834,12 +814,7 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
|
|||
pos.use_srcloc(inst);
|
||||
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
let arg = func.signature.returns[abi_arg];
|
||||
debug_assert!(
|
||||
!arg.legalized_to_pointer,
|
||||
"Return value cannot be legalized to pointer"
|
||||
);
|
||||
arg
|
||||
func.signature.returns[abi_arg]
|
||||
});
|
||||
// Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
|
||||
// the legalized signature. These values should simply be propagated from the entry block
|
||||
|
|
|
@ -35,7 +35,7 @@ mod table;
|
|||
use self::call::expand_call;
|
||||
use self::globalvalue::expand_global_value;
|
||||
use self::heap::expand_heap_addr;
|
||||
pub(crate) use self::libcall::expand_as_libcall;
|
||||
use self::libcall::expand_as_libcall;
|
||||
use self::table::expand_table_addr;
|
||||
|
||||
enum LegalizeInstResult {
|
||||
|
|
|
@ -99,12 +99,12 @@ mod iterators;
|
|||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod num_uses;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
mod redundant_reload_remover;
|
||||
mod regalloc;
|
||||
mod remove_constant_phis;
|
||||
mod result;
|
||||
mod scoped_hash_map;
|
||||
mod simple_gvn;
|
||||
|
@ -114,9 +114,6 @@ mod topo_order;
|
|||
mod unreachable_code;
|
||||
mod value_label;
|
||||
|
||||
#[cfg(feature = "enable-peepmatic")]
|
||||
mod peepmatic;
|
||||
|
||||
pub use crate::result::{CodegenError, CodegenResult};
|
||||
|
||||
/// Version number of this crate.
|
||||
|
|
|
@ -12,15 +12,6 @@ pub trait ABIBody {
|
|||
/// The instruction type for the ISA associated with this ABI.
|
||||
type I: VCodeInst;
|
||||
|
||||
/// Does the ABI-body code need a temp reg? One will be provided to `init()`
|
||||
/// as the `maybe_tmp` arg if so.
|
||||
fn temp_needed(&self) -> bool;
|
||||
|
||||
/// Initialize. This is called after the ABIBody is constructed because it
|
||||
/// may be provided with a temp vreg, which can only be allocated once the
|
||||
/// lowering context exists.
|
||||
fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
|
||||
|
||||
/// Get the settings controlling this function's compilation.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
|
@ -43,13 +34,6 @@ pub trait ABIBody {
|
|||
/// register.
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Generate any setup instruction needed to save values to the
|
||||
/// return-value area. This is usually used when were are multiple return
|
||||
/// values or an otherwise large return value that must be passed on the
|
||||
/// stack; typically the ABI specifies an extra hidden argument that is a
|
||||
/// pointer to that memory.
|
||||
fn gen_retval_area_setup(&self) -> Option<Self::I>;
|
||||
|
||||
/// Generate an instruction which copies a source register to a return value slot.
|
||||
fn gen_copy_reg_to_retval(
|
||||
&self,
|
||||
|
@ -114,10 +98,7 @@ pub trait ABIBody {
|
|||
fn gen_epilogue(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||
/// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
|
||||
/// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
|
||||
/// This is used for ABI variants where the client generates prologue/epilogue code, as in
|
||||
/// Baldrdash (SpiderMonkey integration).
|
||||
/// comprises the spill space, incoming argument space, alignment padding, etc.
|
||||
fn frame_size(&self) -> u32;
|
||||
|
||||
/// Get the spill-slot size.
|
||||
|
@ -151,29 +132,24 @@ pub trait ABICall {
|
|||
/// Get the number of arguments expected.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Emit a copy of an argument value from a source register, prior to the call.
|
||||
fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
|
||||
/// Copy an argument value from a source register, prior to the call.
|
||||
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
idx: usize,
|
||||
from_reg: Reg,
|
||||
);
|
||||
) -> Vec<Self::I>;
|
||||
|
||||
/// Emit a copy a return value into a destination register, after the call returns.
|
||||
fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
idx: usize,
|
||||
into_reg: Writable<Reg>,
|
||||
);
|
||||
/// Copy a return value into a destination register, after the call returns.
|
||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
/// Emit code to pre-adjust the stack, prior to argument copies and call.
|
||||
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
/// Pre-adjust the stack, prior to argument copies and call.
|
||||
fn gen_stack_pre_adjust(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Emit code to post-adjust the satck, after call return and return-value copies.
|
||||
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
/// Post-adjust the satck, after call return and return-value copies.
|
||||
fn gen_stack_post_adjust(&self) -> Vec<Self::I>;
|
||||
|
||||
/// Emit the call itself.
|
||||
/// Generate the call itself.
|
||||
///
|
||||
/// The returned instruction should have proper use- and def-sets according
|
||||
/// to the argument registers, return-value registers, and clobbered
|
||||
|
@ -183,8 +159,5 @@ pub trait ABICall {
|
|||
/// registers are also logically defs, but should never be read; their
|
||||
/// values are "defined" (to the regalloc) but "undefined" in every other
|
||||
/// sense.)
|
||||
///
|
||||
/// This function should only be called once, as it is allowed to re-use
|
||||
/// parts of the ABICall object in emitting instructions.
|
||||
fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
|
||||
fn gen_call(&self) -> Vec<Self::I>;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@ use crate::settings::Flags;
|
|||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
use core::any::Any;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
@ -128,8 +127,4 @@ impl TargetIsa for TargetIsaAdapter {
|
|||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_sub_overflow_condition()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,624 +1,59 @@
|
|||
//! Computation of basic block order in emitted code.
|
||||
//!
|
||||
//! This module handles the translation from CLIF BBs to VCode BBs.
|
||||
//!
|
||||
//! The basic idea is that we compute a sequence of "lowered blocks" that
|
||||
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
|
||||
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
|
||||
//! moves for phi-nodes on every block-to-block transfer; these blocks always
|
||||
//! conceptually exist, but may be merged with an "original" CLIF block (and
|
||||
//! hence not actually exist; this is equivalent to inserting the blocks only on
|
||||
//! critical edges).
|
||||
//!
|
||||
//! In other words, starting from a CFG like this (where each "CLIF block" and
|
||||
//! "(edge N->M)" is a separate basic block):
|
||||
//!
|
||||
//! ```plain
|
||||
//!
|
||||
//! CLIF block 0
|
||||
//! / \
|
||||
//! (edge 0->1) (edge 0->2)
|
||||
//! | |
|
||||
//! CLIF block 1 CLIF block 2
|
||||
//! \ /
|
||||
//! (edge 1->3) (edge 2->3)
|
||||
//! \ /
|
||||
//! CLIF block 3
|
||||
//! ```
|
||||
//!
|
||||
//! We can produce a CFG of lowered blocks like so:
|
||||
//!
|
||||
//! ```plain
|
||||
//! +--------------+
|
||||
//! | CLIF block 0 |
|
||||
//! +--------------+
|
||||
//! / \
|
||||
//! +--------------+ +--------------+
|
||||
//! | (edge 0->1) | |(edge 0->2) |
|
||||
//! | CLIF block 1 | | CLIF block 2 |
|
||||
//! +--------------+ +--------------+
|
||||
//! \ /
|
||||
//! +-----------+ +-----------+
|
||||
//! |(edge 1->3)| |(edge 2->3)|
|
||||
//! +-----------+ +-----------+
|
||||
//! \ /
|
||||
//! +------------+
|
||||
//! |CLIF block 3|
|
||||
//! +------------+
|
||||
//! ```
|
||||
//!
|
||||
//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
|
||||
//! blocks' original bodies, but the out-edges could not because for simplicity
|
||||
//! in the successor-function definition, we only ever merge an edge onto one
|
||||
//! side of an original CLIF block.)
|
||||
//!
|
||||
//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
|
||||
//! block prepended or appended with an edge block (never both, though), or just
|
||||
//! an edge block.
|
||||
//!
|
||||
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
|
||||
//! (never actually materialized, just defined by a "successors" function), and
|
||||
//! compute the reverse postorder.
|
||||
//!
|
||||
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
|
||||
//! example, consider any information about whether edge blocks will actually
|
||||
//! have content, because this computation happens as part of lowering *before*
|
||||
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
|
||||
//! particular edge. But it works relatively well and is conceptually simple.
|
||||
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
|
||||
//! branch editing that in practice elides empty blocks and simplifies some of
|
||||
//! the other redundancies that this scheme produces.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::fx::{FxHashMap, FxHashSet};
|
||||
use crate::ir::{Block, Function, Inst, Opcode};
|
||||
use crate::machinst::lower::visit_block_succs;
|
||||
use crate::machinst::*;
|
||||
use regalloc::{BlockIx, Function};
|
||||
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Mapping from CLIF BBs to VCode BBs.
|
||||
#[derive(Debug)]
|
||||
pub struct BlockLoweringOrder {
|
||||
/// Lowered blocks, in BlockIndex order. Each block is some combination of
|
||||
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
|
||||
/// see [LoweredBlock] for details.
|
||||
lowered_order: Vec<LoweredBlock>,
|
||||
/// Successors for all lowered blocks, in one serialized vector. Indexed by
|
||||
/// the ranges in `lowered_succ_ranges`.
|
||||
lowered_succs: Vec<(Inst, LoweredBlock)>,
|
||||
/// BlockIndex values for successors for all lowered blocks, in the same
|
||||
/// order as `lowered_succs`.
|
||||
lowered_succ_indices: Vec<(Inst, BlockIndex)>,
|
||||
/// Ranges in `lowered_succs` giving the successor lists for each lowered
|
||||
/// block. Indexed by lowering-order index (`BlockIndex`).
|
||||
lowered_succ_ranges: Vec<(usize, usize)>,
|
||||
/// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
|
||||
/// some CLIF BBs may not be lowered; in particular, we skip unreachable
|
||||
/// blocks.
|
||||
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
|
||||
/// Simple reverse postorder-based block order emission.
|
||||
///
|
||||
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
|
||||
/// construction algorithm.
|
||||
struct BlockRPO {
|
||||
visited: Vec<bool>,
|
||||
postorder: Vec<BlockIndex>,
|
||||
deferred_last: Option<BlockIndex>,
|
||||
}
|
||||
|
||||
/// The origin of a block in the lowered block-order: either an original CLIF
|
||||
/// block, or an inserted edge-block, or a combination of the two if an edge is
|
||||
/// non-critical.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum LoweredBlock {
|
||||
/// Block in original CLIF, with no merged edge-blocks.
|
||||
Orig {
|
||||
/// Original CLIF block.
|
||||
block: Block,
|
||||
},
|
||||
/// Block in the original CLIF, plus edge-block to one succ (which is the
|
||||
/// one successor of the original block).
|
||||
OrigAndEdge {
|
||||
/// The original CLIF block contained in this lowered block.
|
||||
block: Block,
|
||||
/// The edge (jump) instruction transitioning from this block
|
||||
/// to the next, i.e., corresponding to the included edge-block. This
|
||||
/// will be an instruction in `block`.
|
||||
edge_inst: Inst,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
/// Block in the original CLIF, preceded by edge-block from one pred (which
|
||||
/// is the one pred of the original block).
|
||||
EdgeAndOrig {
|
||||
/// The previous CLIF block, i.e., the edge block's predecessor.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to the included
|
||||
/// edge-block. This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The original CLIF block included in this lowered block.
|
||||
block: Block,
|
||||
},
|
||||
/// Split critical edge between two CLIF blocks. This lowered block does not
|
||||
/// correspond to any original CLIF blocks; it only serves as an insertion
|
||||
/// point for work to happen on the transition from `pred` to `succ`.
|
||||
Edge {
|
||||
/// The predecessor CLIF block.
|
||||
pred: Block,
|
||||
/// The edge (jump) instruction corresponding to this edge's transition.
|
||||
/// This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
}
|
||||
|
||||
impl LoweredBlock {
|
||||
/// The associated original (CLIF) block included in this lowered block, if
|
||||
/// any.
|
||||
pub fn orig_block(self) -> Option<Block> {
|
||||
match self {
|
||||
LoweredBlock::Orig { block, .. }
|
||||
| LoweredBlock::OrigAndEdge { block, .. }
|
||||
| LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
|
||||
LoweredBlock::Edge { .. } => None,
|
||||
impl BlockRPO {
|
||||
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
|
||||
BlockRPO {
|
||||
visited: vec![false; vcode.num_blocks()],
|
||||
postorder: vec![],
|
||||
deferred_last: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The associated in-edge, if any.
|
||||
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred,
|
||||
edge_inst,
|
||||
block,
|
||||
} => Some((pred, edge_inst, block)),
|
||||
_ => None,
|
||||
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
|
||||
self.visited[block as usize] = true;
|
||||
for succ in vcode.succs(block) {
|
||||
if !self.visited[*succ as usize] {
|
||||
self.visit(vcode, *succ);
|
||||
}
|
||||
}
|
||||
|
||||
for i in vcode.block_insns(BlockIx::new(block)) {
|
||||
if vcode.get_insn(i).is_epilogue_placeholder() {
|
||||
debug_assert!(self.deferred_last.is_none());
|
||||
self.deferred_last = Some(block);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.postorder.push(block);
|
||||
}
|
||||
|
||||
/// the associated out-edge, if any. Also includes edge-only blocks.
|
||||
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block,
|
||||
edge_inst,
|
||||
succ,
|
||||
} => Some((block, edge_inst, succ)),
|
||||
LoweredBlock::Edge {
|
||||
pred,
|
||||
edge_inst,
|
||||
succ,
|
||||
} => Some((pred, edge_inst, succ)),
|
||||
_ => None,
|
||||
fn rpo(self) -> Vec<BlockIndex> {
|
||||
let mut rpo = self.postorder;
|
||||
rpo.reverse();
|
||||
if let Some(block) = self.deferred_last {
|
||||
rpo.push(block);
|
||||
}
|
||||
rpo
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockLoweringOrder {
|
||||
/// Compute and return a lowered block order for `f`.
|
||||
pub fn new(f: &Function) -> BlockLoweringOrder {
|
||||
debug!("BlockLoweringOrder: function body {:?}", f);
|
||||
|
||||
// Step 1: compute the in-edge and out-edge count of every block.
|
||||
let mut block_in_count = SecondaryMap::with_default(0);
|
||||
let mut block_out_count = SecondaryMap::with_default(0);
|
||||
|
||||
// Cache the block successors to avoid re-examining branches below.
|
||||
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
|
||||
let mut block_succ_range = SecondaryMap::with_default((0, 0));
|
||||
let mut fallthrough_return_block = None;
|
||||
for block in f.layout.blocks() {
|
||||
let block_succ_start = block_succs.len();
|
||||
visit_block_succs(f, block, |inst, succ| {
|
||||
block_out_count[block] += 1;
|
||||
block_in_count[succ] += 1;
|
||||
block_succs.push((inst, succ));
|
||||
});
|
||||
let block_succ_end = block_succs.len();
|
||||
block_succ_range[block] = (block_succ_start, block_succ_end);
|
||||
|
||||
for inst in f.layout.block_likely_branches(block) {
|
||||
if f.dfg[inst].opcode() == Opcode::Return {
|
||||
// Implicit output edge for any return.
|
||||
block_out_count[block] += 1;
|
||||
}
|
||||
if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
|
||||
// Fallthrough return block must come last.
|
||||
debug_assert!(fallthrough_return_block == None);
|
||||
fallthrough_return_block = Some(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Implicit input edge for entry block.
|
||||
if let Some(entry) = f.layout.entry_block() {
|
||||
block_in_count[entry] += 1;
|
||||
}
|
||||
|
||||
// Here we define the implicit CLIF-plus-edges graph. There are
|
||||
// conceptually two such graphs: the original, with every edge explicit,
|
||||
// and the merged one, with blocks (represented by `LoweredBlock`
|
||||
// values) that contain original CLIF blocks, edges, or both. This
|
||||
// function returns a lowered block's successors as per the latter, with
|
||||
// consideration to edge-block merging.
|
||||
//
|
||||
// Note that there is a property of the block-merging rules below
|
||||
// that is very important to ensure we don't miss any lowered blocks:
|
||||
// any block in the implicit CLIF-plus-edges graph will *only* be
|
||||
// included in one block in the merged graph.
|
||||
//
|
||||
// This, combined with the property that every edge block is reachable
|
||||
// only from one predecessor (and hence cannot be reached by a DFS
|
||||
// backedge), means that it is sufficient in our DFS below to track
|
||||
// visited-bits per original CLIF block only, not per edge. This greatly
|
||||
// simplifies the data structures (no need to keep a sparse hash-set of
|
||||
// (block, block) tuples).
|
||||
let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
|
||||
let start_idx = ret.len();
|
||||
match block {
|
||||
LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
|
||||
// At an orig block; successors are always edge blocks,
|
||||
// possibly with orig blocks following.
|
||||
let range = block_succ_range[block];
|
||||
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
|
||||
if block_in_count[succ] == 1 {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
block: succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::Edge {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
LoweredBlock::Edge {
|
||||
succ, edge_inst, ..
|
||||
}
|
||||
| LoweredBlock::OrigAndEdge {
|
||||
succ, edge_inst, ..
|
||||
} => {
|
||||
// At an edge block; successors are always orig blocks,
|
||||
// possibly with edge blocks following.
|
||||
if block_out_count[succ] == 1 {
|
||||
let range = block_succ_range[succ];
|
||||
// check if the one succ is a real CFG edge (vs.
|
||||
// implicit return succ).
|
||||
if range.1 - range.0 > 0 {
|
||||
debug_assert!(range.1 - range.0 == 1);
|
||||
let (succ_edge_inst, succ_succ) = block_succs[range.0];
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block: succ,
|
||||
edge_inst: succ_edge_inst,
|
||||
succ: succ_succ,
|
||||
},
|
||||
));
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
} else {
|
||||
ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
|
||||
}
|
||||
}
|
||||
}
|
||||
let end_idx = ret.len();
|
||||
(start_idx, end_idx)
|
||||
};
|
||||
|
||||
// Build the explicit LoweredBlock-to-LoweredBlock successors list.
|
||||
let mut lowered_succs = vec![];
|
||||
let mut lowered_succ_indices = vec![];
|
||||
|
||||
// Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
|
||||
// explicit stack so we don't overflow the real stack with a deep DFS.
|
||||
#[derive(Debug)]
|
||||
struct StackEntry {
|
||||
this: LoweredBlock,
|
||||
succs: (usize, usize), // range in lowered_succs
|
||||
cur_succ: usize, // index in lowered_succs
|
||||
}
|
||||
|
||||
let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
|
||||
let mut visited = FxHashSet::default();
|
||||
let mut postorder = vec![];
|
||||
if let Some(entry) = f.layout.entry_block() {
|
||||
// FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
|
||||
// to not special-case the entry block here.
|
||||
let block = LoweredBlock::Orig { block: entry };
|
||||
visited.insert(block);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, block);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: block,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
}
|
||||
|
||||
let mut deferred_last = None;
|
||||
while !stack.is_empty() {
|
||||
let stack_entry = stack.last_mut().unwrap();
|
||||
let range = stack_entry.succs;
|
||||
if stack_entry.cur_succ == range.0 {
|
||||
let orig_block = stack_entry.this.orig_block();
|
||||
if orig_block.is_some() && orig_block == fallthrough_return_block {
|
||||
deferred_last = Some((stack_entry.this, range));
|
||||
} else {
|
||||
postorder.push((stack_entry.this, range));
|
||||
}
|
||||
stack.pop();
|
||||
} else {
|
||||
// Heuristic: chase the children in reverse. This puts the first
|
||||
// successor block first in RPO, all other things being equal,
|
||||
// which tends to prioritize loop backedges over out-edges,
|
||||
// putting the edge-block closer to the loop body and minimizing
|
||||
// live-ranges in linear instruction space.
|
||||
let next = lowered_succs[stack_entry.cur_succ - 1].1;
|
||||
stack_entry.cur_succ -= 1;
|
||||
if visited.contains(&next) {
|
||||
continue;
|
||||
}
|
||||
visited.insert(next);
|
||||
let range = compute_lowered_succs(&mut lowered_succs, next);
|
||||
lowered_succ_indices.resize(lowered_succs.len(), 0);
|
||||
stack.push(StackEntry {
|
||||
this: next,
|
||||
succs: range,
|
||||
cur_succ: range.1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
postorder.reverse();
|
||||
let mut rpo = postorder;
|
||||
if let Some(d) = deferred_last {
|
||||
rpo.push(d);
|
||||
}
|
||||
|
||||
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
|
||||
let mut lowered_order = vec![];
|
||||
let mut lowered_succ_ranges = vec![];
|
||||
let mut lb_to_bindex = FxHashMap::default();
|
||||
for (block, succ_range) in rpo.into_iter() {
|
||||
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
|
||||
lowered_order.push(block);
|
||||
lowered_succ_ranges.push(succ_range);
|
||||
}
|
||||
|
||||
let lowered_succ_indices = lowered_succs
|
||||
.iter()
|
||||
.map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
|
||||
.collect();
|
||||
|
||||
let mut orig_map = SecondaryMap::with_default(None);
|
||||
for (i, lb) in lowered_order.iter().enumerate() {
|
||||
let i = i as BlockIndex;
|
||||
if let Some(b) = lb.orig_block() {
|
||||
orig_map[b] = Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
let result = BlockLoweringOrder {
|
||||
lowered_order,
|
||||
lowered_succs,
|
||||
lowered_succ_indices,
|
||||
lowered_succ_ranges,
|
||||
orig_map,
|
||||
};
|
||||
debug!("BlockLoweringOrder: {:?}", result);
|
||||
result
|
||||
}
|
||||
|
||||
/// Get the lowered order of blocks.
|
||||
pub fn lowered_order(&self) -> &[LoweredBlock] {
|
||||
&self.lowered_order[..]
|
||||
}
|
||||
|
||||
/// Get the successors for a lowered block, by index in `lowered_order()`'s
|
||||
/// returned slice. Each successsor is paired with the edge-instruction
|
||||
/// (branch) corresponding to this edge.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
|
||||
let range = self.lowered_succ_ranges[block as usize];
|
||||
&self.lowered_succs[range.0..range.1]
|
||||
}
|
||||
|
||||
/// Get the successor indices for a lowered block.
|
||||
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
|
||||
let range = self.lowered_succ_ranges[block as usize];
|
||||
&self.lowered_succ_indices[range.0..range.1]
|
||||
}
|
||||
|
||||
/// Get the lowered block index containing a CLIF block, if any. (May not be
|
||||
/// present if the original CLIF block was unreachable.)
|
||||
pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
|
||||
self.orig_map[bb]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||
use crate::isa::CallConv;
|
||||
|
||||
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
|
||||
assert!(n_blocks > 0);
|
||||
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
let blocks = (0..n_blocks)
|
||||
.map(|i| {
|
||||
let bb = func.dfg.make_block();
|
||||
assert!(bb.as_u32() == i as u32);
|
||||
bb
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let arg0 = func.dfg.append_block_param(blocks[0], I32);
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
|
||||
let mut edge = 0;
|
||||
for i in 0..n_blocks {
|
||||
pos.insert_block(blocks[i]);
|
||||
let mut succs = vec![];
|
||||
while edge < edges.len() && edges[edge].0 == i {
|
||||
succs.push(edges[edge].1);
|
||||
edge += 1;
|
||||
}
|
||||
if succs.len() == 0 {
|
||||
pos.ins().return_(&[arg0]);
|
||||
} else if succs.len() == 1 {
|
||||
pos.ins().jump(blocks[succs[0]], &[]);
|
||||
} else if succs.len() == 2 {
|
||||
pos.ins().brnz(arg0, blocks[succs[0]], &[]);
|
||||
pos.ins().jump(blocks[succs[1]], &[]);
|
||||
} else {
|
||||
panic!("Too many successors");
|
||||
}
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockorder_diamond() {
|
||||
let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 6);
|
||||
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
|
||||
assert!(order.lowered_order[2].orig_block().is_none());
|
||||
assert!(order.lowered_order[2].in_edge().is_none());
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[3].out_edge().is_none());
|
||||
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
|
||||
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[5].in_edge().is_none());
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockorder_critedge() {
|
||||
// 0
|
||||
// / \
|
||||
// 1 2
|
||||
// / \ \
|
||||
// 3 4 |
|
||||
// |\ _|____|
|
||||
// | \/ |
|
||||
// | /\ |
|
||||
// 5 6
|
||||
//
|
||||
// (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
|
||||
//
|
||||
let func = build_test_func(
|
||||
7,
|
||||
&[
|
||||
(0, 1),
|
||||
(0, 2),
|
||||
(1, 3),
|
||||
(1, 4),
|
||||
(2, 5),
|
||||
(3, 5),
|
||||
(3, 6),
|
||||
(4, 6),
|
||||
],
|
||||
);
|
||||
let order = BlockLoweringOrder::new(&func);
|
||||
|
||||
assert_eq!(order.lowered_order.len(), 11);
|
||||
println!("ordered = {:?}", order.lowered_order);
|
||||
|
||||
// block 0
|
||||
assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
|
||||
assert!(order.lowered_order[0].in_edge().is_none());
|
||||
assert!(order.lowered_order[0].out_edge().is_none());
|
||||
|
||||
// edge 0->1 + block 1
|
||||
assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
|
||||
assert!(order.lowered_order[1].out_edge().is_none());
|
||||
|
||||
// edge 1->3 + block 3
|
||||
assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
|
||||
assert!(order.lowered_order[2].out_edge().is_none());
|
||||
|
||||
// edge 3->5
|
||||
assert!(order.lowered_order[3].orig_block().is_none());
|
||||
assert!(order.lowered_order[3].in_edge().is_none());
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
|
||||
|
||||
// edge 3->6
|
||||
assert!(order.lowered_order[4].orig_block().is_none());
|
||||
assert!(order.lowered_order[4].in_edge().is_none());
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
|
||||
assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
|
||||
|
||||
// edge 1->4 + block 4
|
||||
assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
|
||||
assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
|
||||
assert!(order.lowered_order[5].out_edge().is_none());
|
||||
|
||||
// edge 4->6
|
||||
assert!(order.lowered_order[6].orig_block().is_none());
|
||||
assert!(order.lowered_order[6].in_edge().is_none());
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
|
||||
assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
|
||||
|
||||
// block 6
|
||||
assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
|
||||
assert!(order.lowered_order[7].in_edge().is_none());
|
||||
assert!(order.lowered_order[7].out_edge().is_none());
|
||||
|
||||
// edge 0->2 + block 2
|
||||
assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
|
||||
assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
|
||||
assert!(order.lowered_order[8].out_edge().is_none());
|
||||
|
||||
// edge 2->5
|
||||
assert!(order.lowered_order[9].orig_block().is_none());
|
||||
assert!(order.lowered_order[9].in_edge().is_none());
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
|
||||
assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
|
||||
|
||||
// block 5
|
||||
assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
|
||||
assert!(order.lowered_order[10].in_edge().is_none());
|
||||
assert!(order.lowered_order[10].out_edge().is_none());
|
||||
}
|
||||
/// Compute the final block order.
|
||||
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
|
||||
let mut rpo = BlockRPO::new(vcode);
|
||||
rpo.visit(vcode, vcode.entry());
|
||||
rpo.rpo()
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -6,11 +6,11 @@ use crate::settings;
|
|||
use crate::timing;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
|
||||
use regalloc::{allocate_registers, RegAllocAlgorithm};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend + MachBackend>(
|
||||
pub fn compile<B: LowerBackend>(
|
||||
f: &Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABIBody<I = B::MInst>>,
|
||||
|
@ -18,46 +18,29 @@ pub fn compile<B: LowerBackend + MachBackend>(
|
|||
where
|
||||
B::MInst: ShowWithRRU,
|
||||
{
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
// Build the lowering context.
|
||||
let lower = Lower::new(f, abi, block_order)?;
|
||||
// Lower the IR.
|
||||
let mut vcode = lower.lower(b)?;
|
||||
// This lowers the CL IR.
|
||||
let mut vcode = Lower::new(f, abi)?.lower(b)?;
|
||||
|
||||
debug!(
|
||||
"vcode from lowering: \n{}",
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
);
|
||||
let universe = &B::MInst::reg_universe(vcode.flags());
|
||||
|
||||
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
|
||||
|
||||
// Perform register allocation.
|
||||
let (run_checker, algorithm) = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
|
||||
settings::Regalloc::BacktrackingChecked => {
|
||||
(true, Algorithm::Backtracking(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScan => {
|
||||
(false, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScanChecked => {
|
||||
(true, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
let algorithm = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => RegAllocAlgorithm::Backtracking,
|
||||
settings::Regalloc::BacktrackingChecked => RegAllocAlgorithm::BacktrackingChecked,
|
||||
settings::Regalloc::ExperimentalLinearScan => RegAllocAlgorithm::LinearScan,
|
||||
};
|
||||
|
||||
let result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers_with_opts(
|
||||
&mut vcode,
|
||||
b.reg_universe(),
|
||||
Options {
|
||||
run_checker,
|
||||
algorithm,
|
||||
},
|
||||
allocate_registers(
|
||||
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
|
||||
)
|
||||
.map_err(|err| {
|
||||
debug!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(b.reg_universe())),
|
||||
vcode.show_rru(Some(universe)),
|
||||
err
|
||||
);
|
||||
err
|
||||
|
@ -69,9 +52,14 @@ where
|
|||
// all at once. This also inserts prologues/epilogues.
|
||||
vcode.replace_insns_from_regalloc(result);
|
||||
|
||||
vcode.remove_redundant_branches();
|
||||
|
||||
// Do final passes over code to finalize branches.
|
||||
vcode.finalize_branches();
|
||||
|
||||
debug!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
vcode.show_rru(Some(universe))
|
||||
);
|
||||
|
||||
Ok(vcode)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -109,7 +109,6 @@ use regalloc::RegUsageCollector;
|
|||
use regalloc::{
|
||||
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use std::string::String;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
|
@ -125,8 +124,8 @@ pub mod abi;
|
|||
pub use abi::*;
|
||||
pub mod pretty_print;
|
||||
pub use pretty_print::*;
|
||||
pub mod buffer;
|
||||
pub use buffer::*;
|
||||
pub mod sections;
|
||||
pub use sections::*;
|
||||
pub mod adapter;
|
||||
pub use adapter::*;
|
||||
|
||||
|
@ -138,7 +137,7 @@ pub trait MachInst: Clone + Debug {
|
|||
|
||||
/// Map virtual registers to physical registers using the given virt->phys
|
||||
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||
fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
|
||||
fn map_regs(&mut self, maps: &RegUsageMapper);
|
||||
|
||||
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||
|
@ -153,9 +152,6 @@ pub trait MachInst: Clone + Debug {
|
|||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
/// Generate a constant into a reg.
|
||||
fn gen_constant(to_reg: Writable<Reg>, value: u64, ty: Type) -> SmallVec<[Self; 4]>;
|
||||
|
||||
/// Generate a zero-length no-op.
|
||||
fn gen_zero_len_nop() -> Self;
|
||||
|
||||
|
@ -170,7 +166,7 @@ pub trait MachInst: Clone + Debug {
|
|||
|
||||
/// Generate a jump to another target. Used during lowering of
|
||||
/// control flow.
|
||||
fn gen_jump(target: MachLabel) -> Self;
|
||||
fn gen_jump(target: BlockIndex) -> Self;
|
||||
|
||||
/// Generate a NOP. The `preferred_size` parameter allows the caller to
|
||||
/// request a NOP of that size, or as close to it as possible. The machine
|
||||
|
@ -179,6 +175,17 @@ pub trait MachInst: Clone + Debug {
|
|||
/// the instruction must have a nonzero size.
|
||||
fn gen_nop(preferred_size: usize) -> Self;
|
||||
|
||||
/// Rewrite block targets using the block-target map.
|
||||
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
|
||||
|
||||
/// Finalize branches once the block order (fallthrough) is known.
|
||||
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
|
||||
|
||||
/// Update instruction once block offsets are known. These offsets are
|
||||
/// relative to the beginning of the function. `targets` is indexed by
|
||||
/// BlockIndex.
|
||||
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
|
||||
|
||||
/// Get the register universe for this backend.
|
||||
fn reg_universe(flags: &Flags) -> RealRegUniverse;
|
||||
|
||||
|
@ -187,54 +194,6 @@ pub trait MachInst: Clone + Debug {
|
|||
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
|
||||
offset
|
||||
}
|
||||
|
||||
/// What is the worst-case instruction size emitted by this instruction type?
|
||||
fn worst_case_size() -> CodeOffset;
|
||||
|
||||
/// A label-use kind: a type that describes the types of label references that
|
||||
/// can occur in an instruction.
|
||||
type LabelUse: MachInstLabelUse;
|
||||
}
|
||||
|
||||
/// A descriptor of a label reference (use) in an instruction set.
|
||||
pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
|
||||
/// Required alignment for any veneer. Usually the required instruction
|
||||
/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
|
||||
const ALIGN: CodeOffset;
|
||||
|
||||
/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
|
||||
/// label-reference fixup at offset `x` is valid if the label resolves to `x
|
||||
/// + 1024`.
|
||||
fn max_pos_range(self) -> CodeOffset;
|
||||
/// What is the maximum PC-relative range (negative)? This is the absolute
|
||||
/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
|
||||
/// valid if the label resolves to `x - 1024`.
|
||||
fn max_neg_range(self) -> CodeOffset;
|
||||
/// What is the size of code-buffer slice this label-use needs to patch in
|
||||
/// the label's value?
|
||||
fn patch_size(self) -> CodeOffset;
|
||||
/// Perform a code-patch, given the offset into the buffer of this label use
|
||||
/// and the offset into the buffer of the label's definition.
|
||||
/// It is guaranteed that, given `delta = offset - label_offset`, we will
|
||||
/// have `offset >= -self.max_neg_range()` and `offset <=
|
||||
/// self.max_pos_range()`.
|
||||
fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
|
||||
/// Can the label-use be patched to a veneer that supports a longer range?
|
||||
/// Usually valid for jumps (a short-range jump can jump to a longer-range
|
||||
/// jump), but not for e.g. constant pool references, because the constant
|
||||
/// load would require different code (one more level of indirection).
|
||||
fn supports_veneer(self) -> bool;
|
||||
/// How many bytes are needed for a veneer?
|
||||
fn veneer_size(self) -> CodeOffset;
|
||||
/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
|
||||
/// bytes long at offset `veneer_offset` in the buffer. The original
|
||||
/// label-use will be patched to refer to this veneer's offset. A new
|
||||
/// (offset, LabelUse) is returned that allows the veneer to use the actual
|
||||
/// label. For veneers to work properly, it is expected that the new veneer
|
||||
/// has a larger range; on most platforms this probably means either a
|
||||
/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
|
||||
/// stage, a jump that supports a full 32-bit range, for example.
|
||||
fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
|
||||
}
|
||||
|
||||
/// Describes a block terminator (not call) in the vcode, when its branches
|
||||
|
@ -246,26 +205,24 @@ pub enum MachTerminator<'a> {
|
|||
/// A return instruction.
|
||||
Ret,
|
||||
/// An unconditional branch to another block.
|
||||
Uncond(MachLabel),
|
||||
Uncond(BlockIndex),
|
||||
/// A conditional branch to one of two other blocks.
|
||||
Cond(MachLabel, MachLabel),
|
||||
Cond(BlockIndex, BlockIndex),
|
||||
/// An indirect branch with known possible targets.
|
||||
Indirect(&'a [MachLabel]),
|
||||
Indirect(&'a [BlockIndex]),
|
||||
}
|
||||
|
||||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit: MachInst {
|
||||
/// Persistent state carried across `emit` invocations.
|
||||
type State: Default + Clone + Debug;
|
||||
pub trait MachInstEmit<O: MachSectionOutput> {
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
|
||||
fn emit(&self, code: &mut O, flags: &Flags);
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
/// code (as bytes) and a disassembly, if requested.
|
||||
pub struct MachCompileResult {
|
||||
/// Machine code.
|
||||
pub buffer: MachBufferFinalized,
|
||||
pub sections: MachSections,
|
||||
/// Size of stack frame, in bytes.
|
||||
pub frame_size: u32,
|
||||
/// Disassembly, if requested.
|
||||
|
@ -275,7 +232,7 @@ pub struct MachCompileResult {
|
|||
impl MachCompileResult {
|
||||
/// Get a `CodeInfo` describing section sizes from this compilation result.
|
||||
pub fn code_info(&self) -> CodeInfo {
|
||||
let code_size = self.buffer.total_size();
|
||||
let code_size = self.sections.total_size();
|
||||
CodeInfo {
|
||||
code_size,
|
||||
jumptables_size: 0,
|
||||
|
@ -305,13 +262,17 @@ pub trait MachBackend {
|
|||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Return the register universe for this backend.
|
||||
fn reg_universe(&self) -> &RealRegUniverse;
|
||||
fn reg_universe(&self) -> RealRegUniverse;
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
/// Condition that will be true when an IaddIfcout overflows.
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC;
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
/// Condition that will be true when an IsubIfcout overflows.
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC;
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,460 @@
|
|||
//! In-memory representation of compiled machine code, in multiple sections
|
||||
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
|
||||
//! simultaneously, so we buffer the result in memory and hand off to the
|
||||
//! caller at the end of compilation.
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// A collection of sections with defined start-offsets.
|
||||
pub struct MachSections {
|
||||
/// Sections, in offset order.
|
||||
pub sections: Vec<MachSection>,
|
||||
}
|
||||
|
||||
impl MachSections {
|
||||
/// New, empty set of sections.
|
||||
pub fn new() -> MachSections {
|
||||
MachSections { sections: vec![] }
|
||||
}
|
||||
|
||||
/// Add a section with a known offset and size. Returns the index.
|
||||
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
|
||||
let idx = self.sections.len();
|
||||
self.sections.push(MachSection::new(start, length));
|
||||
idx
|
||||
}
|
||||
|
||||
/// Mutably borrow the given section by index.
|
||||
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
|
||||
&mut self.sections[idx]
|
||||
}
|
||||
|
||||
/// Get mutable borrows of two sections simultaneously. Used during
|
||||
/// instruction emission to provide references to the .text and .rodata
|
||||
/// (constant pool) sections.
|
||||
pub fn two_sections<'a>(
|
||||
&'a mut self,
|
||||
idx1: usize,
|
||||
idx2: usize,
|
||||
) -> (&'a mut MachSection, &'a mut MachSection) {
|
||||
assert!(idx1 < idx2);
|
||||
assert!(idx1 < self.sections.len());
|
||||
assert!(idx2 < self.sections.len());
|
||||
let (first, rest) = self.sections.split_at_mut(idx2);
|
||||
(&mut first[idx1], &mut rest[0])
|
||||
}
|
||||
|
||||
/// Emit this set of sections to a set of sinks for the code,
|
||||
/// relocations, traps, and stackmap.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
// N.B.: we emit every section into the .text section as far as
|
||||
// the `CodeSink` is concerned; we do not bother to segregate
|
||||
// the contents into the actual program text, the jumptable and the
|
||||
// rodata (constant pool). This allows us to generate code assuming
|
||||
// that these will not be relocated relative to each other, and avoids
|
||||
// having to designate each section as belonging in one of the three
|
||||
// fixed categories defined by `CodeSink`. If this becomes a problem
|
||||
// later (e.g. because of memory permissions or similar), we can
|
||||
// add this designation and segregate the output; take care, however,
|
||||
// to add the appropriate relocations in this case.
|
||||
|
||||
for section in &self.sections {
|
||||
if section.data.len() > 0 {
|
||||
while sink.offset() < section.start_offset {
|
||||
sink.put1(0);
|
||||
}
|
||||
section.emit(sink);
|
||||
}
|
||||
}
|
||||
sink.begin_jumptables();
|
||||
sink.begin_rodata();
|
||||
sink.end_codegen();
|
||||
}
|
||||
|
||||
/// Get a list of source location mapping tuples in sorted-by-start-offset order.
|
||||
pub fn get_srclocs_sorted<'a>(&'a self) -> MachSectionsSrcLocs<'a> {
|
||||
MachSectionsSrcLocs::new(&self.sections)
|
||||
}
|
||||
|
||||
/// Get the total required size for these sections.
|
||||
pub fn total_size(&self) -> CodeOffset {
|
||||
if self.sections.len() == 0 {
|
||||
0
|
||||
} else {
|
||||
// Find the last non-empty section.
|
||||
self.sections
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| s.data.len() > 0)
|
||||
.map(|s| s.cur_offset_from_start())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the srclocs in each section.
|
||||
/// Returns MachSrcLocs in an order sorted by start location.
|
||||
pub struct MachSectionsSrcLocs<'a> {
|
||||
sections: &'a [MachSection],
|
||||
cur_section: usize,
|
||||
cur_srcloc: usize,
|
||||
// For validation:
|
||||
last_offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl<'a> MachSectionsSrcLocs<'a> {
|
||||
fn new(sections: &'a [MachSection]) -> MachSectionsSrcLocs<'a> {
|
||||
MachSectionsSrcLocs {
|
||||
sections,
|
||||
cur_section: 0,
|
||||
cur_srcloc: 0,
|
||||
last_offset: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for MachSectionsSrcLocs<'a> {
|
||||
type Item = &'a MachSrcLoc;
|
||||
|
||||
fn next(&mut self) -> Option<&'a MachSrcLoc> {
|
||||
// We simply iterate through sections and srcloc records in order. This produces a
|
||||
// sorted order naturally because sections are in starting-offset-order, and srclocs
|
||||
// are produced as a section is emitted into, so are in order as well.
|
||||
|
||||
// If we're out of sections, we're done.
|
||||
if self.cur_section >= self.sections.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Otherwise, make sure we have a srcloc in the current section left to return, and
|
||||
// advance to the next section if not. Done if we run out of sections.
|
||||
while self.cur_srcloc >= self.sections[self.cur_section].srclocs.len() {
|
||||
self.cur_srcloc = 0;
|
||||
self.cur_section += 1;
|
||||
if self.cur_section >= self.sections.len() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let loc = &self.sections[self.cur_section].srclocs[self.cur_srcloc];
|
||||
self.cur_srcloc += 1;
|
||||
debug_assert!(loc.start >= self.last_offset);
|
||||
self.last_offset = loc.start;
|
||||
Some(loc)
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstraction over MachSection and MachSectionSize: some
|
||||
/// receiver of section data.
|
||||
pub trait MachSectionOutput {
|
||||
/// Get the current offset from the start of all sections.
|
||||
fn cur_offset_from_start(&self) -> CodeOffset;
|
||||
|
||||
/// Get the start offset of this section.
|
||||
fn start_offset(&self) -> CodeOffset;
|
||||
|
||||
/// Add 1 byte to the section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the section.
|
||||
fn put2(&mut self, value: u16) {
|
||||
let [b0, b1] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
}
|
||||
|
||||
/// Add 4 bytes to the section.
|
||||
fn put4(&mut self, value: u32) {
|
||||
let [b0, b1, b2, b3] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
}
|
||||
|
||||
/// Add 8 bytes to the section.
|
||||
fn put8(&mut self, value: u64) {
|
||||
let [b0, b1, b2, b3, b4, b5, b6, b7] = value.to_le_bytes();
|
||||
self.put1(b0);
|
||||
self.put1(b1);
|
||||
self.put1(b2);
|
||||
self.put1(b3);
|
||||
self.put1(b4);
|
||||
self.put1(b5);
|
||||
self.put1(b6);
|
||||
self.put1(b7);
|
||||
}
|
||||
|
||||
/// Add a slice of bytes to the section.
|
||||
fn put_data(&mut self, data: &[u8]);
|
||||
|
||||
/// Add a relocation at the current offset.
|
||||
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
|
||||
|
||||
/// Add a trap record at the current offset.
|
||||
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
|
||||
|
||||
/// Add a call return address record at the current offset.
|
||||
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
|
||||
|
||||
/// Start the output for the given source-location at the current offset.
|
||||
fn start_srcloc(&mut self, loc: SourceLoc);
|
||||
|
||||
/// End the output for the previously-given source-location at the current offset.
|
||||
fn end_srcloc(&mut self);
|
||||
|
||||
/// Align up to the given alignment.
|
||||
fn align_to(&mut self, align_to: CodeOffset) {
|
||||
assert!(align_to.is_power_of_two());
|
||||
while self.cur_offset_from_start() & (align_to - 1) != 0 {
|
||||
self.put1(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
|
||||
/// Multiple sections may be created with known start offsets in advance; the
|
||||
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
|
||||
/// once, after computing the length of the code, so that constant references
|
||||
/// can use known offsets as instructions are emitted.
|
||||
pub struct MachSection {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The limit of this section, defined by the start of the next section.
|
||||
pub length_limit: CodeOffset,
|
||||
/// The section contents, as raw bytes.
|
||||
pub data: Vec<u8>,
|
||||
/// Any relocations referring to this section.
|
||||
pub relocs: Vec<MachReloc>,
|
||||
/// Any trap records referring to this section.
|
||||
pub traps: Vec<MachTrap>,
|
||||
/// Any call site records referring to this section.
|
||||
pub call_sites: Vec<MachCallSite>,
|
||||
/// Any source location mappings referring to this section.
|
||||
pub srclocs: Vec<MachSrcLoc>,
|
||||
/// The current source location in progress (after `start_srcloc()` and before `end_srcloc()`).
|
||||
/// This is a (start_offset, src_loc) tuple.
|
||||
pub cur_srcloc: Option<(CodeOffset, SourceLoc)>,
|
||||
}
|
||||
|
||||
impl MachSection {
|
||||
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
|
||||
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
|
||||
MachSection {
|
||||
start_offset,
|
||||
length_limit,
|
||||
data: vec![],
|
||||
relocs: vec![],
|
||||
traps: vec![],
|
||||
call_sites: vec![],
|
||||
srclocs: vec![],
|
||||
cur_srcloc: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit this section to the CodeSink and other associated sinks. The
|
||||
/// current offset of the CodeSink must match the starting offset of this
|
||||
/// section.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
assert!(sink.offset() == self.start_offset);
|
||||
|
||||
let mut next_reloc = 0;
|
||||
let mut next_trap = 0;
|
||||
let mut next_call_site = 0;
|
||||
for (idx, byte) in self.data.iter().enumerate() {
|
||||
if next_reloc < self.relocs.len() {
|
||||
let reloc = &self.relocs[next_reloc];
|
||||
if reloc.offset == idx as CodeOffset {
|
||||
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
|
||||
next_reloc += 1;
|
||||
}
|
||||
}
|
||||
if next_trap < self.traps.len() {
|
||||
let trap = &self.traps[next_trap];
|
||||
if trap.offset == idx as CodeOffset {
|
||||
sink.trap(trap.code, trap.srcloc);
|
||||
next_trap += 1;
|
||||
}
|
||||
}
|
||||
if next_call_site < self.call_sites.len() {
|
||||
let call_site = &self.call_sites[next_call_site];
|
||||
if call_site.ret_addr == idx as CodeOffset {
|
||||
sink.add_call_site(call_site.opcode, call_site.srcloc);
|
||||
next_call_site += 1;
|
||||
}
|
||||
}
|
||||
sink.put1(*byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSection {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
self.start_offset + self.data.len() as CodeOffset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, value: u8) {
|
||||
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
|
||||
self.data.push(value);
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
|
||||
self.data.extend_from_slice(data);
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
|
||||
let name = name.clone();
|
||||
self.relocs.push(MachReloc {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
kind,
|
||||
name,
|
||||
addend,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
|
||||
self.traps.push(MachTrap {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
code,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
|
||||
self.call_sites.push(MachCallSite {
|
||||
ret_addr: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
opcode,
|
||||
});
|
||||
}
|
||||
|
||||
fn start_srcloc(&mut self, loc: SourceLoc) {
|
||||
self.cur_srcloc = Some((self.cur_offset_from_start(), loc));
|
||||
}
|
||||
|
||||
fn end_srcloc(&mut self) {
|
||||
let (start, loc) = self
|
||||
.cur_srcloc
|
||||
.take()
|
||||
.expect("end_srcloc() called without start_srcloc()");
|
||||
let end = self.cur_offset_from_start();
|
||||
// Skip zero-length extends.
|
||||
debug_assert!(end >= start);
|
||||
if end > start {
|
||||
self.srclocs.push(MachSrcLoc { start, end, loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A MachSectionOutput implementation that records only size.
|
||||
pub struct MachSectionSize {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The current offset of this section.
|
||||
pub offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl MachSectionSize {
|
||||
/// Create a new size-counting dummy section.
|
||||
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
|
||||
MachSectionSize {
|
||||
start_offset,
|
||||
offset: start_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the size this section would take if emitted with a real sink.
|
||||
pub fn size(&self) -> CodeOffset {
|
||||
self.offset - self.start_offset
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSectionSize {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
// All size-counting sections conceptually start at offset 0; this doesn't
|
||||
// matter when counting code size.
|
||||
self.offset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, _: u8) {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
self.offset += data.len() as CodeOffset;
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
|
||||
|
||||
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
|
||||
|
||||
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
|
||||
|
||||
fn start_srcloc(&mut self, _: SourceLoc) {}
|
||||
|
||||
fn end_srcloc(&mut self) {}
|
||||
}
|
||||
|
||||
/// A relocation resulting from a compilation.
|
||||
pub struct MachReloc {
|
||||
/// The offset at which the relocation applies, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The kind of relocation.
|
||||
pub kind: Reloc,
|
||||
/// The external symbol / name to which this relocation refers.
|
||||
pub name: ExternalName,
|
||||
/// The addend to add to the symbol value.
|
||||
pub addend: i64,
|
||||
}
|
||||
|
||||
/// A trap record resulting from a compilation.
|
||||
pub struct MachTrap {
|
||||
/// The offset at which the trap instruction occurs, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The trap code.
|
||||
pub code: TrapCode,
|
||||
}
|
||||
|
||||
/// A call site record resulting from a compilation.
|
||||
pub struct MachCallSite {
|
||||
/// The offset of the call's return address, *relative to the containing section*.
|
||||
pub ret_addr: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The call's opcode.
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// A source-location mapping resulting from a compilation.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MachSrcLoc {
|
||||
/// The start of the region of code corresponding to a source location.
|
||||
/// This is relative to the start of the function, not to the start of the
|
||||
/// section.
|
||||
pub start: CodeOffset,
|
||||
/// The end of the region of code corresponding to a source location.
|
||||
/// This is relative to the start of the section, not to the start of the
|
||||
/// section.
|
||||
pub end: CodeOffset,
|
||||
/// The source location.
|
||||
pub loc: SourceLoc,
|
||||
}
|
|
@ -17,7 +17,9 @@
|
|||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||
//! backend pipeline.
|
||||
|
||||
use crate::ir::{self, SourceLoc};
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir;
|
||||
use crate::ir::SourceLoc;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
|
@ -28,7 +30,9 @@ use regalloc::{
|
|||
};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{borrow::Cow, vec::Vec};
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::iter;
|
||||
use std::string::String;
|
||||
|
@ -40,8 +44,8 @@ pub type BlockIndex = u32;
|
|||
|
||||
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
|
||||
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
|
||||
pub trait VCodeInst: MachInst + MachInstEmit {}
|
||||
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
|
||||
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
|
||||
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
|
||||
|
||||
/// A function in "VCode" (virtualized-register code) form, after lowering.
|
||||
/// This is essentially a standard CFG of basic blocks, where each basic block
|
||||
|
@ -75,10 +79,25 @@ pub struct VCode<I: VCodeInst> {
|
|||
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
|
||||
/// list of tuples above gives (start, end) ranges within this list that
|
||||
/// correspond to each basic block's successors.
|
||||
block_succs: Vec<BlockIx>,
|
||||
block_succs: Vec<BlockIndex>,
|
||||
|
||||
/// Block-order information.
|
||||
block_order: BlockLoweringOrder,
|
||||
/// Block indices by IR block.
|
||||
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
|
||||
|
||||
/// IR block for each VCode Block. The length of this Vec will likely be
|
||||
/// less than the total number of Blocks, because new Blocks (for edge
|
||||
/// splits, for example) are appended during lowering.
|
||||
bb_by_block: Vec<ir::Block>,
|
||||
|
||||
/// Order of block IDs in final generated code.
|
||||
final_block_order: Vec<BlockIndex>,
|
||||
|
||||
/// Final block offsets. Computed during branch finalization and used
|
||||
/// during emission.
|
||||
final_block_offsets: Vec<CodeOffset>,
|
||||
|
||||
/// Size of code, accounting for block layout / alignment.
|
||||
code_size: CodeOffset,
|
||||
|
||||
/// ABI object.
|
||||
abi: Box<dyn ABIBody<I = I>>,
|
||||
|
@ -102,8 +121,12 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
/// In-progress VCode.
|
||||
vcode: VCode<I>,
|
||||
|
||||
/// Index of the last block-start in the vcode.
|
||||
block_start: InsnIndex,
|
||||
/// Current basic block instructions, in reverse order (because blocks are
|
||||
/// built bottom-to-top).
|
||||
bb_insns: SmallVec<[(I, SourceLoc); 32]>,
|
||||
|
||||
/// Current IR-inst instructions, in forward order.
|
||||
ir_inst_insns: SmallVec<[(I, SourceLoc); 4]>,
|
||||
|
||||
/// Start of succs for the current block in the concatenated succs list.
|
||||
succ_start: usize,
|
||||
|
@ -114,11 +137,12 @@ pub struct VCodeBuilder<I: VCodeInst> {
|
|||
|
||||
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||
/// Create a new VCodeBuilder.
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi, block_order);
|
||||
pub fn new(abi: Box<dyn ABIBody<I = I>>) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi);
|
||||
VCodeBuilder {
|
||||
vcode,
|
||||
block_start: 0,
|
||||
bb_insns: SmallVec::new(),
|
||||
ir_inst_insns: SmallVec::new(),
|
||||
succ_start: 0,
|
||||
cur_srcloc: SourceLoc::default(),
|
||||
}
|
||||
|
@ -129,11 +153,6 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
&mut *self.vcode.abi
|
||||
}
|
||||
|
||||
/// Access to the BlockLoweringOrder object.
|
||||
pub fn block_order(&self) -> &BlockLoweringOrder {
|
||||
&self.vcode.block_order
|
||||
}
|
||||
|
||||
/// Set the type of a VReg.
|
||||
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
|
||||
while self.vcode.vreg_types.len() <= vreg.get_index() {
|
||||
|
@ -142,17 +161,53 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||
}
|
||||
|
||||
/// Return the underlying bb-to-BlockIndex map.
|
||||
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
|
||||
&self.vcode.block_by_bb
|
||||
}
|
||||
|
||||
/// Initialize the bb-to-BlockIndex map. Returns the first free
|
||||
/// BlockIndex.
|
||||
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
|
||||
let mut bindex: BlockIndex = 0;
|
||||
for bb in blocks.iter() {
|
||||
self.vcode.block_by_bb[*bb] = bindex;
|
||||
self.vcode.bb_by_block.push(*bb);
|
||||
bindex += 1;
|
||||
}
|
||||
bindex
|
||||
}
|
||||
|
||||
/// Get the BlockIndex for an IR block.
|
||||
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
|
||||
self.vcode.block_by_bb[bb]
|
||||
}
|
||||
|
||||
/// Set the current block as the entry block.
|
||||
pub fn set_entry(&mut self, block: BlockIndex) {
|
||||
self.vcode.entry = block;
|
||||
}
|
||||
|
||||
/// End the current IR instruction. Must be called after pushing any
|
||||
/// instructions and prior to ending the basic block.
|
||||
pub fn end_ir_inst(&mut self) {
|
||||
while let Some(pair) = self.ir_inst_insns.pop() {
|
||||
self.bb_insns.push(pair);
|
||||
}
|
||||
}
|
||||
|
||||
/// End the current basic block. Must be called after emitting vcode insts
|
||||
/// for IR insts and prior to ending the function (building the VCode).
|
||||
pub fn end_bb(&mut self) {
|
||||
let start_idx = self.block_start;
|
||||
pub fn end_bb(&mut self) -> BlockIndex {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
let block_num = self.vcode.block_ranges.len() as BlockIndex;
|
||||
// Push the instructions.
|
||||
let start_idx = self.vcode.insts.len() as InsnIndex;
|
||||
while let Some((i, loc)) = self.bb_insns.pop() {
|
||||
self.vcode.insts.push(i);
|
||||
self.vcode.srclocs.push(loc);
|
||||
}
|
||||
let end_idx = self.vcode.insts.len() as InsnIndex;
|
||||
self.block_start = end_idx;
|
||||
// Add the instruction index range to the list of blocks.
|
||||
self.vcode.block_ranges.push((start_idx, end_idx));
|
||||
// End the successors list.
|
||||
|
@ -161,6 +216,8 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
.block_succ_range
|
||||
.push((self.succ_start, succ_end));
|
||||
self.succ_start = succ_end;
|
||||
|
||||
block_num
|
||||
}
|
||||
|
||||
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||
|
@ -168,27 +225,19 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
match insn.is_term() {
|
||||
MachTerminator::None | MachTerminator::Ret => {}
|
||||
MachTerminator::Uncond(target) => {
|
||||
self.vcode.block_succs.push(BlockIx::new(target.get()));
|
||||
self.vcode.block_succs.push(target);
|
||||
}
|
||||
MachTerminator::Cond(true_branch, false_branch) => {
|
||||
self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
|
||||
self.vcode
|
||||
.block_succs
|
||||
.push(BlockIx::new(false_branch.get()));
|
||||
self.vcode.block_succs.push(true_branch);
|
||||
self.vcode.block_succs.push(false_branch);
|
||||
}
|
||||
MachTerminator::Indirect(targets) => {
|
||||
for target in targets {
|
||||
self.vcode.block_succs.push(BlockIx::new(target.get()));
|
||||
self.vcode.block_succs.push(*target);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.vcode.insts.push(insn);
|
||||
self.vcode.srclocs.push(self.cur_srcloc);
|
||||
}
|
||||
|
||||
/// Get the current source location.
|
||||
pub fn get_srcloc(&self) -> SourceLoc {
|
||||
self.cur_srcloc
|
||||
self.ir_inst_insns.push((insn, self.cur_srcloc));
|
||||
}
|
||||
|
||||
/// Set the current source location.
|
||||
|
@ -198,6 +247,8 @@ impl<I: VCodeInst> VCodeBuilder<I> {
|
|||
|
||||
/// Build the final VCode.
|
||||
pub fn build(self) -> VCode<I> {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
assert!(self.bb_insns.is_empty());
|
||||
self.vcode
|
||||
}
|
||||
}
|
||||
|
@ -219,9 +270,35 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
|
||||
let range = vcode.block_insns(BlockIx::new(block));
|
||||
|
||||
debug!(
|
||||
"is_trivial_jump_block: block {} has len {}",
|
||||
block,
|
||||
range.len()
|
||||
);
|
||||
|
||||
if range.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let insn = range.first();
|
||||
|
||||
debug!(
|
||||
" -> only insn is: {:?} with terminator {:?}",
|
||||
vcode.get_insn(insn),
|
||||
vcode.get_insn(insn).is_term()
|
||||
);
|
||||
|
||||
match vcode.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(target) => Some(target),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCode<I> {
|
||||
/// New empty VCode.
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
|
||||
fn new(abi: Box<dyn ABIBody<I = I>>) -> VCode<I> {
|
||||
VCode {
|
||||
liveins: abi.liveins(),
|
||||
liveouts: abi.liveouts(),
|
||||
|
@ -232,7 +309,11 @@ impl<I: VCodeInst> VCode<I> {
|
|||
block_ranges: vec![],
|
||||
block_succ_range: vec![],
|
||||
block_succs: vec![],
|
||||
block_order,
|
||||
block_by_bb: SecondaryMap::with_default(0),
|
||||
bb_by_block: vec![],
|
||||
final_block_order: vec![],
|
||||
final_block_offsets: vec![],
|
||||
code_size: 0,
|
||||
abi,
|
||||
}
|
||||
}
|
||||
|
@ -264,7 +345,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
}
|
||||
|
||||
/// Get the successors for a block.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
|
||||
let (start, end) = self.block_succ_range[block as usize];
|
||||
&self.block_succs[start..end]
|
||||
}
|
||||
|
@ -273,6 +354,8 @@ impl<I: VCodeInst> VCode<I> {
|
|||
/// instructions including spliced fill/reload/move instructions, and replace
|
||||
/// the VCode with them.
|
||||
pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
|
||||
self.final_block_order = compute_final_block_order(self);
|
||||
|
||||
// Record the spillslot count and clobbered registers for the ABI/stack
|
||||
// setup code.
|
||||
self.abi.set_num_spillslots(result.num_spill_slots as usize);
|
||||
|
@ -287,12 +370,11 @@ impl<I: VCodeInst> VCode<I> {
|
|||
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||
let mut final_srclocs = vec![];
|
||||
|
||||
for block in 0..self.num_blocks() {
|
||||
let block = block as BlockIndex;
|
||||
let (start, end) = block_ranges[block as usize];
|
||||
for block in &self.final_block_order {
|
||||
let (start, end) = block_ranges[*block as usize];
|
||||
let final_start = final_insns.len() as InsnIndex;
|
||||
|
||||
if block == self.entry {
|
||||
if *block == self.entry {
|
||||
// Start with the prologue.
|
||||
let prologue = self.abi.gen_prologue();
|
||||
let len = prologue.len();
|
||||
|
@ -334,7 +416,7 @@ impl<I: VCodeInst> VCode<I> {
|
|||
}
|
||||
|
||||
let final_end = final_insns.len() as InsnIndex;
|
||||
final_block_ranges[block as usize] = (final_start, final_end);
|
||||
final_block_ranges[*block as usize] = (final_start, final_end);
|
||||
}
|
||||
|
||||
debug_assert!(final_insns.len() == final_srclocs.len());
|
||||
|
@ -344,68 +426,174 @@ impl<I: VCodeInst> VCode<I> {
|
|||
self.block_ranges = final_block_ranges;
|
||||
}
|
||||
|
||||
/// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
|
||||
/// reloc/trap/etc. records ready for use.
|
||||
pub fn emit(&self) -> MachBuffer<I>
|
||||
where
|
||||
I: MachInstEmit,
|
||||
{
|
||||
let mut buffer = MachBuffer::new();
|
||||
let mut state = Default::default();
|
||||
/// Removes redundant branches, rewriting targets to point directly to the
|
||||
/// ultimate block at the end of a chain of trivial one-target jumps.
|
||||
pub fn remove_redundant_branches(&mut self) {
|
||||
// For each block, compute the actual target block, looking through up to one
|
||||
// block with single-target jumps (this will remove empty edge blocks inserted
|
||||
// by phi-lowering).
|
||||
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
|
||||
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
|
||||
.collect();
|
||||
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
|
||||
|
||||
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.
|
||||
debug!(
|
||||
"remove_redundant_branches: block_rewrites = {:?}",
|
||||
block_rewrites
|
||||
);
|
||||
|
||||
let flags = self.abi.flags();
|
||||
let mut cur_srcloc = None;
|
||||
for block in 0..self.num_blocks() {
|
||||
let block = block as BlockIndex;
|
||||
let new_offset = I::align_basic_block(buffer.cur_offset());
|
||||
while new_offset > buffer.cur_offset() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
|
||||
nop.emit(&mut buffer, flags, &mut Default::default());
|
||||
}
|
||||
assert_eq!(buffer.cur_offset(), new_offset);
|
||||
refcounts[self.entry as usize] = 1;
|
||||
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
buffer.bind_label(MachLabel::from_block(block));
|
||||
for iix in start..end {
|
||||
let srcloc = self.srclocs[iix as usize];
|
||||
if cur_srcloc != Some(srcloc) {
|
||||
if cur_srcloc.is_some() {
|
||||
buffer.end_srcloc();
|
||||
for block in 0..self.num_blocks() as u32 {
|
||||
for insn in self.block_insns(BlockIx::new(block)) {
|
||||
self.get_insn_mut(insn)
|
||||
.with_block_rewrites(&block_rewrites[..]);
|
||||
match self.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(bix) => {
|
||||
refcounts[bix as usize] += 1;
|
||||
}
|
||||
buffer.start_srcloc(srcloc);
|
||||
cur_srcloc = Some(srcloc);
|
||||
}
|
||||
|
||||
self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
|
||||
}
|
||||
|
||||
if cur_srcloc.is_some() {
|
||||
buffer.end_srcloc();
|
||||
cur_srcloc = None;
|
||||
}
|
||||
|
||||
// Do we need an island? Get the worst-case size of the next BB and see if, having
|
||||
// emitted that many bytes, we will be beyond the deadline.
|
||||
if block < (self.num_blocks() - 1) as BlockIndex {
|
||||
let next_block = block + 1;
|
||||
let next_block_range = self.block_ranges[next_block as usize];
|
||||
let next_block_size = next_block_range.1 - next_block_range.0;
|
||||
let worst_case_next_bb = I::worst_case_size() * next_block_size;
|
||||
if buffer.island_needed(worst_case_next_bb) {
|
||||
buffer.emit_island();
|
||||
MachTerminator::Cond(bix1, bix2) => {
|
||||
refcounts[bix1 as usize] += 1;
|
||||
refcounts[bix2 as usize] += 1;
|
||||
}
|
||||
MachTerminator::Indirect(blocks) => {
|
||||
for block in blocks {
|
||||
refcounts[*block as usize] += 1;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buffer
|
||||
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
|
||||
|
||||
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
|
||||
self.final_block_order = block_order
|
||||
.into_iter()
|
||||
.filter(|b| !deleted[*b as usize])
|
||||
.collect();
|
||||
|
||||
// Rewrite successor information based on the block-rewrite map.
|
||||
for succ in &mut self.block_succs {
|
||||
let new_succ = block_rewrites[*succ as usize];
|
||||
*succ = new_succ;
|
||||
}
|
||||
}
|
||||
|
||||
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
|
||||
/// depending on fallthrough; and (ii) use concrete offsets.
|
||||
pub fn finalize_branches(&mut self)
|
||||
where
|
||||
I: MachInstEmit<MachSectionSize>,
|
||||
{
|
||||
// Compute fallthrough block, indexed by block.
|
||||
let num_final_blocks = self.final_block_order.len();
|
||||
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
|
||||
for i in 0..(num_final_blocks - 1) {
|
||||
let from = self.final_block_order[i];
|
||||
let to = self.final_block_order[i + 1];
|
||||
block_fallthrough[from as usize] = Some(to);
|
||||
}
|
||||
|
||||
// Pass over VCode instructions and finalize two-way branches into
|
||||
// one-way branches with fallthrough.
|
||||
for block in 0..self.num_blocks() {
|
||||
let next_block = block_fallthrough[block];
|
||||
let (start, end) = self.block_ranges[block];
|
||||
|
||||
for iix in start..end {
|
||||
let insn = &mut self.insts[iix as usize];
|
||||
insn.with_fallthrough_block(next_block);
|
||||
}
|
||||
}
|
||||
|
||||
let flags = self.abi.flags();
|
||||
|
||||
// Compute block offsets.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
let mut block_offsets = vec![0; self.num_blocks()];
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
block_offsets[block as usize] = code_section.offset;
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(&mut code_section, flags);
|
||||
}
|
||||
}
|
||||
|
||||
// We now have the section layout.
|
||||
self.final_block_offsets = block_offsets;
|
||||
self.code_size = code_section.size();
|
||||
|
||||
// Update branches with known block offsets. This looks like the
|
||||
// traversal above, but (i) does not update block_offsets, rather uses
|
||||
// it (so forward references are now possible), and (ii) mutates the
|
||||
// instructions.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
for &block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize]
|
||||
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
|
||||
self.insts[iix as usize].emit(&mut code_section, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the instructions to a list of sections.
|
||||
pub fn emit(&self) -> MachSections
|
||||
where
|
||||
I: MachInstEmit<MachSection>,
|
||||
{
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, self.code_size);
|
||||
let code_section = sections.get_section(code_idx);
|
||||
|
||||
let flags = self.abi.flags();
|
||||
let mut cur_srcloc = SourceLoc::default();
|
||||
for &block in &self.final_block_order {
|
||||
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
|
||||
while new_offset > code_section.cur_offset_from_start() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
|
||||
nop.emit(code_section, flags);
|
||||
}
|
||||
assert_eq!(code_section.cur_offset_from_start(), new_offset);
|
||||
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
for iix in start..end {
|
||||
let srcloc = self.srclocs[iix as usize];
|
||||
if srcloc != cur_srcloc {
|
||||
if !cur_srcloc.is_default() {
|
||||
code_section.end_srcloc();
|
||||
}
|
||||
if !srcloc.is_default() {
|
||||
code_section.start_srcloc(srcloc);
|
||||
}
|
||||
cur_srcloc = srcloc;
|
||||
}
|
||||
|
||||
self.insts[iix as usize].emit(code_section, flags);
|
||||
}
|
||||
|
||||
if !cur_srcloc.is_default() {
|
||||
code_section.end_srcloc();
|
||||
cur_srcloc = SourceLoc::default();
|
||||
}
|
||||
}
|
||||
|
||||
sections
|
||||
}
|
||||
|
||||
/// Get the IR block for a BlockIndex, if one exists.
|
||||
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
|
||||
self.block_order.lowered_order()[block as usize].orig_block()
|
||||
if (block as usize) < self.bb_by_block.len() {
|
||||
Some(self.bb_by_block[block as usize])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -441,9 +629,13 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
|||
Range::new(InstIx::new(start), (end - start) as usize)
|
||||
}
|
||||
|
||||
fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
|
||||
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
|
||||
let (start, end) = self.block_succ_range[block.get() as usize];
|
||||
Cow::Borrowed(&self.block_succs[start..end])
|
||||
self.block_succs[start..end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(BlockIx::new)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn is_ret(&self, insn: InstIx) -> bool {
|
||||
|
@ -457,7 +649,7 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
|||
insn.get_regs(collector)
|
||||
}
|
||||
|
||||
fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
|
||||
fn map_regs(insn: &mut I, mapper: &RegUsageMapper) {
|
||||
insn.map_regs(mapper);
|
||||
}
|
||||
|
||||
|
@ -510,11 +702,12 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "VCode_Debug {{")?;
|
||||
writeln!(f, " Entry block: {}", self.entry)?;
|
||||
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
|
||||
|
||||
for block in 0..self.num_blocks() {
|
||||
writeln!(f, "Block {}:", block,)?;
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
writeln!(f, " (successor: Block {})", succ.get())?;
|
||||
writeln!(f, " (successor: Block {})", succ)?;
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
writeln!(f, " (instruction range: {} .. {})", start, end)?;
|
||||
|
@ -533,21 +726,52 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
|||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
// Calculate an order in which to display the blocks. This is the same
|
||||
// as final_block_order, but also includes blocks which are in the
|
||||
// representation but not in final_block_order.
|
||||
let mut display_order = Vec::<usize>::new();
|
||||
// First display blocks in `final_block_order`
|
||||
for bix in &self.final_block_order {
|
||||
assert!((*bix as usize) < self.num_blocks());
|
||||
display_order.push(*bix as usize);
|
||||
}
|
||||
// Now also take care of those not listed in `final_block_order`.
|
||||
// This is quadratic, but it's also debug-only code.
|
||||
for bix in 0..self.num_blocks() {
|
||||
if display_order.contains(&bix) {
|
||||
continue;
|
||||
}
|
||||
display_order.push(bix);
|
||||
}
|
||||
|
||||
let mut s = String::new();
|
||||
write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
|
||||
write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
|
||||
write!(
|
||||
&mut s,
|
||||
" Final block order: {:?}\n",
|
||||
self.final_block_order
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for i in 0..self.num_blocks() {
|
||||
let block = i as BlockIndex;
|
||||
let block = display_order[i];
|
||||
|
||||
write!(&mut s, "Block {}:\n", block).unwrap();
|
||||
if let Some(bb) = self.bindex_to_bb(block) {
|
||||
let omitted = if !self.final_block_order.is_empty() && i >= self.final_block_order.len()
|
||||
{
|
||||
"** OMITTED **"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
write!(&mut s, "Block {}: {}\n", block, omitted).unwrap();
|
||||
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
|
||||
write!(&mut s, " (original IR block: {})\n", bb).unwrap();
|
||||
}
|
||||
for succ in self.succs(block) {
|
||||
write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap();
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
write!(&mut s, " (successor: Block {})\n", succ).unwrap();
|
||||
}
|
||||
let (start, end) = self.block_ranges[block as usize];
|
||||
let (start, end) = self.block_ranges[block];
|
||||
write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
|
||||
for inst in start..end {
|
||||
write!(
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
//! A pass that computes the number of uses of any given instruction.
|
||||
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::Value;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst};
|
||||
|
||||
/// Auxiliary data structure that counts the number of uses of any given
|
||||
/// instruction in a Function. This is used during instruction selection
|
||||
/// to essentially do incremental DCE: when an instruction is no longer
|
||||
/// needed because its computation has been isel'd into another machine
|
||||
/// instruction at every use site, we can skip it.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NumUses {
|
||||
uses: SecondaryMap<Inst, u32>,
|
||||
}
|
||||
|
||||
impl NumUses {
|
||||
fn new() -> NumUses {
|
||||
NumUses {
|
||||
uses: SecondaryMap::with_default(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the NumUses analysis result for a function.
|
||||
pub fn compute(func: &Function) -> NumUses {
|
||||
let mut uses = NumUses::new();
|
||||
for bb in func.layout.blocks() {
|
||||
for inst in func.layout.block_insts(bb) {
|
||||
for arg in func.dfg.inst_args(inst) {
|
||||
let v = func.dfg.resolve_aliases(*arg);
|
||||
uses.add_value(&func.dfg, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
uses
|
||||
}
|
||||
|
||||
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
|
||||
match dfg.value_def(v) {
|
||||
ValueDef::Result(inst, _) => {
|
||||
self.uses[inst] += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Take the complete uses map, consuming this analysis result.
|
||||
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
|
||||
self.uses
|
||||
}
|
||||
}
|
|
@ -1,887 +0,0 @@
|
|||
//! Glue for working with `peepmatic`-generated peephole optimizers.
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{
|
||||
dfg::DataFlowGraph,
|
||||
entities::{Inst, Value},
|
||||
immediates::{Imm64, Uimm64},
|
||||
instructions::{InstructionData, Opcode},
|
||||
types, InstBuilder,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
use cranelift_codegen_shared::condcodes::IntCC;
|
||||
use peepmatic_runtime::{
|
||||
cc::ConditionCode,
|
||||
instruction_set::InstructionSet,
|
||||
operator::Operator,
|
||||
part::{Constant, Part},
|
||||
paths::Path,
|
||||
r#type::{BitWidth, Kind, Type},
|
||||
PeepholeOptimizations, PeepholeOptimizer,
|
||||
};
|
||||
use std::boxed::Box;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::ptr;
|
||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||
|
||||
/// Get the `preopt.peepmatic` peephole optimizer.
|
||||
pub(crate) fn preopt<'a, 'b>(
|
||||
isa: &'b dyn TargetIsa,
|
||||
) -> PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa> {
|
||||
static SERIALIZED: &[u8] = include_bytes!("preopt.serialized");
|
||||
|
||||
// Once initialized, this must never be re-assigned. The initialized value
|
||||
// is semantically "static data" and is intentionally leaked for the whole
|
||||
// program's lifetime.
|
||||
static DESERIALIZED: AtomicPtr<PeepholeOptimizations> = AtomicPtr::new(ptr::null_mut());
|
||||
|
||||
// If `DESERIALIZED` has already been initialized, then just use it.
|
||||
let ptr = DESERIALIZED.load(Ordering::SeqCst);
|
||||
if let Some(peep_opts) = unsafe { ptr.as_ref() } {
|
||||
return peep_opts.optimizer(isa);
|
||||
}
|
||||
|
||||
// Otherwise, if `DESERIALIZED` hasn't been initialized, then we need to
|
||||
// deserialize the peephole optimizations and initialize it. However,
|
||||
// another thread could be doing the same thing concurrently, so there is a
|
||||
// race to see who initializes `DESERIALIZED` first, and we need to be
|
||||
// prepared to both win or lose that race.
|
||||
let peep_opts = PeepholeOptimizations::deserialize(SERIALIZED)
|
||||
.expect("should always be able to deserialize `preopt.serialized`");
|
||||
let peep_opts = Box::into_raw(Box::new(peep_opts));
|
||||
|
||||
// Only update `DESERIALIZE` if it is still null, attempting to perform the
|
||||
// one-time transition from null -> non-null.
|
||||
if DESERIALIZED
|
||||
.compare_and_swap(ptr::null_mut(), peep_opts, Ordering::SeqCst)
|
||||
.is_null()
|
||||
{
|
||||
// We won the race to initialize `DESERIALIZED`.
|
||||
debug_assert_eq!(DESERIALIZED.load(Ordering::SeqCst), peep_opts);
|
||||
let peep_opts = unsafe { &*peep_opts };
|
||||
return peep_opts.optimizer(isa);
|
||||
}
|
||||
|
||||
// We lost the race to initialize `DESERIALIZED`. Drop our no-longer-needed
|
||||
// instance of `peep_opts` and get the pointer to the instance that won the
|
||||
// race.
|
||||
let _ = unsafe { Box::from_raw(peep_opts) };
|
||||
let peep_opts = DESERIALIZED.load(Ordering::SeqCst);
|
||||
let peep_opts = unsafe { peep_opts.as_ref().unwrap() };
|
||||
peep_opts.optimizer(isa)
|
||||
}
|
||||
|
||||
/// Either a `Value` or an `Inst`.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ValueOrInst {
|
||||
Value(Value),
|
||||
Inst(Inst),
|
||||
}
|
||||
|
||||
impl ValueOrInst {
|
||||
/// Get the underlying `Value` if any.
|
||||
pub fn value(&self) -> Option<Value> {
|
||||
match *self {
|
||||
Self::Value(v) => Some(v),
|
||||
Self::Inst(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the underlying `Inst` if any.
|
||||
pub fn inst(&self) -> Option<Inst> {
|
||||
match *self {
|
||||
Self::Inst(i) => Some(i),
|
||||
Self::Value(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap the underlying `Value`, panicking if it is not a `Value.
|
||||
pub fn unwrap_value(&self) -> Value {
|
||||
self.value().unwrap()
|
||||
}
|
||||
|
||||
/// Unwrap the underlying `Inst`, panicking if it is not a `Inst.
|
||||
pub fn unwrap_inst(&self) -> Inst {
|
||||
self.inst().unwrap()
|
||||
}
|
||||
|
||||
/// Is this a `Value`?
|
||||
pub fn is_value(&self) -> bool {
|
||||
self.value().is_some()
|
||||
}
|
||||
|
||||
/// Is this an `Inst`?
|
||||
pub fn is_inst(&self) -> bool {
|
||||
self.inst().is_some()
|
||||
}
|
||||
|
||||
fn resolve_inst(&self, dfg: &DataFlowGraph) -> Option<Inst> {
|
||||
match *self {
|
||||
ValueOrInst::Inst(i) => Some(i),
|
||||
ValueOrInst::Value(v) => dfg.value_def(v).inst(),
|
||||
}
|
||||
}
|
||||
|
||||
fn result_bit_width(&self, dfg: &DataFlowGraph) -> u8 {
|
||||
match *self {
|
||||
ValueOrInst::Value(v) => dfg.value_type(v).bits().try_into().unwrap(),
|
||||
ValueOrInst::Inst(inst) => {
|
||||
let result = dfg.first_result(inst);
|
||||
dfg.value_type(result).bits().try_into().unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_constant(&self, pos: &mut FuncCursor) -> Option<Constant> {
|
||||
let inst = self.resolve_inst(&pos.func.dfg)?;
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} => {
|
||||
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
|
||||
let x: i64 = imm.into();
|
||||
Some(Constant::Int(x as u64, width))
|
||||
}
|
||||
InstructionData::UnaryBool {
|
||||
opcode: Opcode::Bconst,
|
||||
imm,
|
||||
} => {
|
||||
let width = self.result_bit_width(&pos.func.dfg).try_into().unwrap();
|
||||
Some(Constant::Bool(imm, width))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Value> for ValueOrInst {
|
||||
fn from(v: Value) -> ValueOrInst {
|
||||
ValueOrInst::Value(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Inst> for ValueOrInst {
|
||||
fn from(i: Inst) -> ValueOrInst {
|
||||
ValueOrInst::Inst(i)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the fixed bit width of `bit_width`, or if it is polymorphic, the bit
|
||||
/// width of `root`.
|
||||
fn bit_width(dfg: &DataFlowGraph, bit_width: BitWidth, root: Inst) -> u8 {
|
||||
bit_width.fixed_width().unwrap_or_else(|| {
|
||||
let tyvar = dfg.ctrl_typevar(root);
|
||||
let ty = dfg.compute_result_type(root, 0, tyvar).unwrap();
|
||||
u8::try_from(ty.bits()).unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert the constant `c` into an instruction.
|
||||
fn const_to_value<'a>(builder: impl InstBuilder<'a>, c: Constant, root: Inst) -> Value {
|
||||
match c {
|
||||
Constant::Bool(b, width) => {
|
||||
let width = bit_width(builder.data_flow_graph(), width, root);
|
||||
let ty = match width {
|
||||
1 => types::B1,
|
||||
8 => types::B8,
|
||||
16 => types::B16,
|
||||
32 => types::B32,
|
||||
64 => types::B64,
|
||||
128 => types::B128,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
builder.bconst(ty, b)
|
||||
}
|
||||
Constant::Int(x, width) => {
|
||||
let width = bit_width(builder.data_flow_graph(), width, root);
|
||||
let ty = match width {
|
||||
8 => types::I8,
|
||||
16 => types::I16,
|
||||
32 => types::I32,
|
||||
64 => types::I64,
|
||||
128 => types::I128,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
builder.iconst(ty, x as i64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn part_to_value(pos: &mut FuncCursor, root: Inst, part: Part<ValueOrInst>) -> Option<Value> {
|
||||
match part {
|
||||
Part::Instruction(ValueOrInst::Inst(inst)) => {
|
||||
pos.func.dfg.inst_results(inst).first().copied()
|
||||
}
|
||||
Part::Instruction(ValueOrInst::Value(v)) => Some(v),
|
||||
Part::Constant(c) => Some(const_to_value(pos.ins(), c, root)),
|
||||
Part::ConditionCode(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
fn to_peepmatic_operator(&self) -> Option<Operator> {
|
||||
macro_rules! convert {
|
||||
( $( $op:ident $(,)* )* ) => {
|
||||
match self {
|
||||
$( Self::$op => Some(Operator::$op), )*
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
convert!(
|
||||
AdjustSpDown,
|
||||
AdjustSpDownImm,
|
||||
Band,
|
||||
BandImm,
|
||||
Bconst,
|
||||
Bint,
|
||||
Bor,
|
||||
BorImm,
|
||||
Brnz,
|
||||
Brz,
|
||||
Bxor,
|
||||
BxorImm,
|
||||
Iadd,
|
||||
IaddImm,
|
||||
Icmp,
|
||||
IcmpImm,
|
||||
Iconst,
|
||||
Ifcmp,
|
||||
IfcmpImm,
|
||||
Imul,
|
||||
ImulImm,
|
||||
Ireduce,
|
||||
IrsubImm,
|
||||
Ishl,
|
||||
IshlImm,
|
||||
Isub,
|
||||
Rotl,
|
||||
RotlImm,
|
||||
Rotr,
|
||||
RotrImm,
|
||||
Sdiv,
|
||||
SdivImm,
|
||||
Select,
|
||||
Sextend,
|
||||
Srem,
|
||||
SremImm,
|
||||
Sshr,
|
||||
SshrImm,
|
||||
Trapnz,
|
||||
Trapz,
|
||||
Udiv,
|
||||
UdivImm,
|
||||
Uextend,
|
||||
Urem,
|
||||
UremImm,
|
||||
Ushr,
|
||||
UshrImm,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Constant> for Imm64 {
|
||||
type Error = &'static str;
|
||||
|
||||
fn try_from(c: Constant) -> Result<Self, Self::Error> {
|
||||
match c {
|
||||
Constant::Int(x, _) => Ok(Imm64::from(x as i64)),
|
||||
Constant::Bool(..) => Err("cannot create Imm64 from Constant::Bool"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Constant> for Imm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Constant {
|
||||
let x: i64 = self.into();
|
||||
Constant::Int(x as _, BitWidth::SixtyFour)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Part<ValueOrInst>> for Imm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Part<ValueOrInst> {
|
||||
let c: Constant = self.into();
|
||||
c.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn part_to_imm64(pos: &mut FuncCursor, part: Part<ValueOrInst>) -> Imm64 {
|
||||
return match part {
|
||||
Part::Instruction(x) => match x.to_constant(pos).unwrap_or_else(|| cannot_convert()) {
|
||||
Constant::Int(x, _) => (x as i64).into(),
|
||||
Constant::Bool(..) => cannot_convert(),
|
||||
},
|
||||
Part::Constant(Constant::Int(x, _)) => (x as i64).into(),
|
||||
Part::ConditionCode(_) | Part::Constant(Constant::Bool(..)) => cannot_convert(),
|
||||
};
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn cannot_convert() -> ! {
|
||||
panic!("cannot convert part into `Imm64`")
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Constant> for Uimm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Constant {
|
||||
let x: u64 = self.into();
|
||||
Constant::Int(x, BitWidth::SixtyFour)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Part<ValueOrInst>> for Uimm64 {
|
||||
#[inline]
|
||||
fn into(self) -> Part<ValueOrInst> {
|
||||
let c: Constant = self.into();
|
||||
c.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn peepmatic_to_intcc(cc: ConditionCode) -> IntCC {
|
||||
match cc {
|
||||
ConditionCode::Eq => IntCC::Equal,
|
||||
ConditionCode::Ne => IntCC::NotEqual,
|
||||
ConditionCode::Slt => IntCC::SignedLessThan,
|
||||
ConditionCode::Sle => IntCC::SignedGreaterThanOrEqual,
|
||||
ConditionCode::Sgt => IntCC::SignedGreaterThan,
|
||||
ConditionCode::Sge => IntCC::SignedLessThanOrEqual,
|
||||
ConditionCode::Ult => IntCC::UnsignedLessThan,
|
||||
ConditionCode::Uge => IntCC::UnsignedGreaterThanOrEqual,
|
||||
ConditionCode::Ugt => IntCC::UnsignedGreaterThan,
|
||||
ConditionCode::Ule => IntCC::UnsignedLessThanOrEqual,
|
||||
ConditionCode::Of => IntCC::Overflow,
|
||||
ConditionCode::Nof => IntCC::NotOverflow,
|
||||
}
|
||||
}
|
||||
|
||||
fn intcc_to_peepmatic(cc: IntCC) -> ConditionCode {
|
||||
match cc {
|
||||
IntCC::Equal => ConditionCode::Eq,
|
||||
IntCC::NotEqual => ConditionCode::Ne,
|
||||
IntCC::SignedLessThan => ConditionCode::Slt,
|
||||
IntCC::SignedGreaterThanOrEqual => ConditionCode::Sle,
|
||||
IntCC::SignedGreaterThan => ConditionCode::Sgt,
|
||||
IntCC::SignedLessThanOrEqual => ConditionCode::Sge,
|
||||
IntCC::UnsignedLessThan => ConditionCode::Ult,
|
||||
IntCC::UnsignedGreaterThanOrEqual => ConditionCode::Uge,
|
||||
IntCC::UnsignedGreaterThan => ConditionCode::Ugt,
|
||||
IntCC::UnsignedLessThanOrEqual => ConditionCode::Ule,
|
||||
IntCC::Overflow => ConditionCode::Of,
|
||||
IntCC::NotOverflow => ConditionCode::Nof,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_immediate(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Part<ValueOrInst> {
|
||||
return match dfg[inst] {
|
||||
InstructionData::BinaryImm64 { imm, .. } if i == 0 => imm.into(),
|
||||
InstructionData::BranchIcmp { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::BranchInt { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompare { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompareImm { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCompareImm { imm, .. } if i == 1 => imm.into(),
|
||||
InstructionData::IntCond { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntCondTrap { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::IntSelect { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
|
||||
InstructionData::UnaryBool { imm, .. } if i == 0 => {
|
||||
Constant::Bool(imm, BitWidth::Polymorphic).into()
|
||||
}
|
||||
InstructionData::UnaryImm { imm, .. } if i == 0 => imm.into(),
|
||||
ref otherwise => unsupported(otherwise),
|
||||
};
|
||||
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
fn unsupported(data: &InstructionData) -> ! {
|
||||
panic!("unsupported instruction data: {:?}", data)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_argument(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Option<Value> {
|
||||
dfg.inst_args(inst).get(i).copied()
|
||||
}
|
||||
|
||||
fn peepmatic_ty_to_ir_ty(ty: Type, dfg: &DataFlowGraph, root: Inst) -> types::Type {
|
||||
match (ty.kind, bit_width(dfg, ty.bit_width, root)) {
|
||||
(Kind::Int, 8) => types::I8,
|
||||
(Kind::Int, 16) => types::I16,
|
||||
(Kind::Int, 32) => types::I32,
|
||||
(Kind::Int, 64) => types::I64,
|
||||
(Kind::Int, 128) => types::I128,
|
||||
(Kind::Bool, 1) => types::B1,
|
||||
(Kind::Bool, 8) => types::I8,
|
||||
(Kind::Bool, 16) => types::I16,
|
||||
(Kind::Bool, 32) => types::I32,
|
||||
(Kind::Bool, 64) => types::I64,
|
||||
(Kind::Bool, 128) => types::I128,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// NB: the unsafe contract we must uphold here is that our implementation of
|
||||
// `instruction_result_bit_width` must always return a valid, non-zero bit
|
||||
// width.
|
||||
unsafe impl<'a, 'b> InstructionSet<'b> for &'a dyn TargetIsa {
|
||||
type Context = FuncCursor<'b>;
|
||||
|
||||
type Instruction = ValueOrInst;
|
||||
|
||||
fn replace_instruction(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
old: ValueOrInst,
|
||||
new: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("replace {:?} with {:?}", old, new);
|
||||
let old_inst = old.resolve_inst(&pos.func.dfg).unwrap();
|
||||
|
||||
// Try to convert `new` to an instruction, because we prefer replacing
|
||||
// an old instruction with a new one wholesale. However, if the
|
||||
// replacement cannot be converted to an instruction (e.g. the
|
||||
// right-hand side is a block/function parameter value) then we change
|
||||
// the old instruction's result to an alias of the new value.
|
||||
let new_inst = match new {
|
||||
Part::Instruction(ValueOrInst::Inst(inst)) => Some(inst),
|
||||
Part::Instruction(ValueOrInst::Value(_)) => {
|
||||
// Do not try and follow the value definition. If we transplant
|
||||
// this value's instruction, and there are other uses of this
|
||||
// value, then we could mess up ordering between instructions.
|
||||
None
|
||||
}
|
||||
Part::Constant(c) => {
|
||||
let v = const_to_value(pos.ins(), c, old_inst);
|
||||
let inst = pos.func.dfg.value_def(v).unwrap_inst();
|
||||
Some(inst)
|
||||
}
|
||||
Part::ConditionCode(_) => None,
|
||||
};
|
||||
|
||||
match new_inst {
|
||||
Some(new_inst) => {
|
||||
pos.func.transplant_inst(old_inst, new_inst);
|
||||
debug_assert_eq!(pos.current_inst(), Some(old_inst));
|
||||
old_inst.into()
|
||||
}
|
||||
None => {
|
||||
let new_value = part_to_value(pos, old_inst, new).unwrap();
|
||||
|
||||
let old_results = pos.func.dfg.detach_results(old_inst);
|
||||
let old_results = old_results.as_slice(&pos.func.dfg.value_lists);
|
||||
assert_eq!(old_results.len(), 1);
|
||||
let old_value = old_results[0];
|
||||
|
||||
pos.func.dfg.change_to_alias(old_value, new_value);
|
||||
pos.func.dfg.replace(old_inst).nop();
|
||||
|
||||
new_value.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_part_at_path(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
path: Path,
|
||||
) -> Option<Part<ValueOrInst>> {
|
||||
// The root is path [0].
|
||||
debug_assert!(!path.0.is_empty());
|
||||
debug_assert_eq!(path.0[0], 0);
|
||||
|
||||
let mut part = Part::Instruction(root);
|
||||
for p in path.0[1..].iter().copied() {
|
||||
let inst = part.as_instruction()?.resolve_inst(&pos.func.dfg)?;
|
||||
let operator = pos.func.dfg[inst].opcode().to_peepmatic_operator()?;
|
||||
|
||||
if p < operator.immediates_arity() {
|
||||
part = get_immediate(&pos.func.dfg, inst, p as usize);
|
||||
continue;
|
||||
}
|
||||
|
||||
let arg = p - operator.immediates_arity();
|
||||
let arg = arg as usize;
|
||||
let value = get_argument(&pos.func.dfg, inst, arg)?;
|
||||
part = Part::Instruction(value.into());
|
||||
}
|
||||
|
||||
log::trace!("get_part_at_path({:?}) = {:?}", path, part);
|
||||
Some(part)
|
||||
}
|
||||
|
||||
fn operator(&self, pos: &mut FuncCursor<'b>, value_or_inst: ValueOrInst) -> Option<Operator> {
|
||||
let inst = value_or_inst.resolve_inst(&pos.func.dfg)?;
|
||||
pos.func.dfg[inst].opcode().to_peepmatic_operator()
|
||||
}
|
||||
|
||||
fn make_inst_1(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
r#type: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_1: {:?}({:?})", operator, a);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::AdjustSpDown => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
pos.ins().adjust_sp_down(a).into()
|
||||
}
|
||||
Operator::AdjustSpDownImm => {
|
||||
let c = a.unwrap_constant();
|
||||
let imm = Imm64::try_from(c).unwrap();
|
||||
pos.ins().adjust_sp_down_imm(imm).into()
|
||||
}
|
||||
Operator::Bconst => {
|
||||
let c = a.unwrap_constant();
|
||||
let val = const_to_value(pos.ins(), c, root);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bint => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().bint(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Brnz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// NB: branching instructions must be the root of an
|
||||
// optimization's right-hand side, so we get the destination
|
||||
// block and arguments from the left-hand side's root. Peepmatic
|
||||
// doesn't currently represent labels or varargs.
|
||||
let block = pos.func.dfg[root].branch_destination().unwrap();
|
||||
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
|
||||
|
||||
pos.ins().brnz(a, block, &args).into()
|
||||
}
|
||||
Operator::Brz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// See the comment in the `Operator::Brnz` match argm.
|
||||
let block = pos.func.dfg[root].branch_destination().unwrap();
|
||||
let args = pos.func.dfg.inst_args(root)[1..].to_vec();
|
||||
|
||||
pos.ins().brz(a, block, &args).into()
|
||||
}
|
||||
Operator::Iconst => {
|
||||
let a = a.unwrap_constant();
|
||||
let val = const_to_value(pos.ins(), a, root);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ireduce => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().ireduce(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sextend => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().sextend(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Trapnz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
|
||||
// NB: similar to branching instructions (see comment in the
|
||||
// `Operator::Brnz` match arm) trapping instructions must be the
|
||||
// root of an optimization's right-hand side, and we get the
|
||||
// trap code from the root of the left-hand side. Peepmatic
|
||||
// doesn't currently represent trap codes.
|
||||
let code = pos.func.dfg[root].trap_code().unwrap();
|
||||
|
||||
pos.ins().trapnz(a, code).into()
|
||||
}
|
||||
Operator::Trapz => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
// See comment in the `Operator::Trapnz` match arm.
|
||||
let code = pos.func.dfg[root].trap_code().unwrap();
|
||||
pos.ins().trapz(a, code).into()
|
||||
}
|
||||
Operator::Uextend => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let ty = peepmatic_ty_to_ir_ty(r#type, &pos.func.dfg, root);
|
||||
let val = pos.ins().uextend(ty, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_inst_2(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
_: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
b: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_2: {:?}({:?}, {:?})", operator, a, b);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::Band => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().band(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BandImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().band_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bor => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bor(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BorImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bor_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Bxor => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bxor(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::BxorImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().bxor_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Iadd => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().iadd(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IaddImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().iadd_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ifcmp => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ifcmp(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IfcmpImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ifcmp_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Imul => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().imul(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::ImulImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().imul_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IrsubImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().irsub_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ishl => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ishl(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IshlImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ishl_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Isub => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().isub(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Rotl => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotl(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::RotlImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotl_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Rotr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::RotrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().rotr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sdiv => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sdiv(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SdivImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sdiv_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Srem => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().srem(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SremImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().srem_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Sshr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sshr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::SshrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().sshr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Udiv => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().udiv(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UdivImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().udiv_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Urem => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().urem(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UremImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().urem_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Ushr => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ushr(a, b);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::UshrImm => {
|
||||
let a = part_to_imm64(pos, a);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let val = pos.ins().ushr_imm(b, a);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_inst_3(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
root: ValueOrInst,
|
||||
operator: Operator,
|
||||
_: Type,
|
||||
a: Part<ValueOrInst>,
|
||||
b: Part<ValueOrInst>,
|
||||
c: Part<ValueOrInst>,
|
||||
) -> ValueOrInst {
|
||||
log::trace!("make_inst_3: {:?}({:?}, {:?}, {:?})", operator, a, b, c);
|
||||
|
||||
let root = root.resolve_inst(&pos.func.dfg).unwrap();
|
||||
match operator {
|
||||
Operator::Icmp => {
|
||||
let cond = a.unwrap_condition_code();
|
||||
let cond = peepmatic_to_intcc(cond);
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().icmp(cond, b, c);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::IcmpImm => {
|
||||
let cond = a.unwrap_condition_code();
|
||||
let cond = peepmatic_to_intcc(cond);
|
||||
let imm = part_to_imm64(pos, b);
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().icmp_imm(cond, c, imm);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
Operator::Select => {
|
||||
let a = part_to_value(pos, root, a).unwrap();
|
||||
let b = part_to_value(pos, root, b).unwrap();
|
||||
let c = part_to_value(pos, root, c).unwrap();
|
||||
let val = pos.ins().select(a, b, c);
|
||||
pos.func.dfg.value_def(val).unwrap_inst().into()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn instruction_to_constant(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
value_or_inst: ValueOrInst,
|
||||
) -> Option<Constant> {
|
||||
value_or_inst.to_constant(pos)
|
||||
}
|
||||
|
||||
fn instruction_result_bit_width(
|
||||
&self,
|
||||
pos: &mut FuncCursor<'b>,
|
||||
value_or_inst: ValueOrInst,
|
||||
) -> u8 {
|
||||
value_or_inst.result_bit_width(&pos.func.dfg)
|
||||
}
|
||||
|
||||
fn native_word_size_in_bits(&self, _pos: &mut FuncCursor<'b>) -> u8 {
|
||||
self.pointer_bits()
|
||||
}
|
||||
}
|
|
@ -271,42 +271,6 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
|
|||
.replace(inst)
|
||||
.sload32_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Store => {
|
||||
pos.func.dfg.replace(inst).store_complex(
|
||||
info.flags,
|
||||
|
@ -341,7 +305,7 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
|
|||
}
|
||||
_ => panic!("Unsupported load or store opcode"),
|
||||
},
|
||||
InstructionData::BinaryImm64 {
|
||||
InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg,
|
||||
imm,
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
;; Apply basic simplifications.
|
||||
;;
|
||||
;; This folds constants with arithmetic to form `_imm` instructions, and other
|
||||
;; minor simplifications.
|
||||
;;
|
||||
;; Doesn't apply some simplifications if the native word width (in bytes) is
|
||||
;; smaller than the controlling type's width of the instruction. This would
|
||||
;; result in an illegal instruction that would likely be expanded back into an
|
||||
;; instruction on smaller types with the same initial opcode, creating
|
||||
;; unnecessary churn.
|
||||
|
||||
;; Binary instructions whose second argument is constant.
|
||||
(=> (when (iadd $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $C $x))
|
||||
(=> (when (imul $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(imul_imm $C $x))
|
||||
(=> (when (sdiv $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(sdiv_imm $C $x))
|
||||
(=> (when (udiv $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(udiv_imm $C $x))
|
||||
(=> (when (srem $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(srem_imm $C $x))
|
||||
(=> (when (urem $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(urem_imm $C $x))
|
||||
(=> (when (band $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(band_imm $C $x))
|
||||
(=> (when (bor $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(bor_imm $C $x))
|
||||
(=> (when (bxor $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(bxor_imm $C $x))
|
||||
(=> (when (rotl $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(rotl_imm $C $x))
|
||||
(=> (when (rotr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(rotr_imm $C $x))
|
||||
(=> (when (ishl $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ishl_imm $C $x))
|
||||
(=> (when (ushr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ushr_imm $C $x))
|
||||
(=> (when (sshr $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(sshr_imm $C $x))
|
||||
(=> (when (isub $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $(neg $C) $x))
|
||||
(=> (when (ifcmp $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(ifcmp_imm $C $x))
|
||||
(=> (when (icmp $cond $x $C)
|
||||
(fits-in-native-word $C))
|
||||
(icmp_imm $cond $C $x))
|
||||
|
||||
;; Binary instructions whose first operand is constant.
|
||||
(=> (when (iadd $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(iadd_imm $C $x))
|
||||
(=> (when (imul $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(imul_imm $C $x))
|
||||
(=> (when (band $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(band_imm $C $x))
|
||||
(=> (when (bor $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(bor_imm $C $x))
|
||||
(=> (when (bxor $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(bxor_imm $C $x))
|
||||
(=> (when (isub $C $x)
|
||||
(fits-in-native-word $C))
|
||||
(irsub_imm $C $x))
|
||||
|
||||
;; Unary instructions whose operand is constant.
|
||||
(=> (adjust_sp_down $C) (adjust_sp_down_imm $C))
|
||||
|
||||
;; Fold `(binop_imm $C1 (binop_imm $C2 $x))` into `(binop_imm $(binop $C2 $C1) $x)`.
|
||||
(=> (iadd_imm $C1 (iadd_imm $C2 $x)) (iadd_imm $(iadd $C1 $C2) $x))
|
||||
(=> (imul_imm $C1 (imul_imm $C2 $x)) (imul_imm $(imul $C1 $C2) $x))
|
||||
(=> (bor_imm $C1 (bor_imm $C2 $x)) (bor_imm $(bor $C1 $C2) $x))
|
||||
(=> (band_imm $C1 (band_imm $C2 $x)) (band_imm $(band $C1 $C2) $x))
|
||||
(=> (bxor_imm $C1 (bxor_imm $C2 $x)) (bxor_imm $(bxor $C1 $C2) $x))
|
||||
|
||||
;; Remove operations that are no-ops.
|
||||
(=> (iadd_imm 0 $x) $x)
|
||||
(=> (imul_imm 1 $x) $x)
|
||||
(=> (sdiv_imm 1 $x) $x)
|
||||
(=> (udiv_imm 1 $x) $x)
|
||||
(=> (bor_imm 0 $x) $x)
|
||||
(=> (band_imm -1 $x) $x)
|
||||
(=> (bxor_imm 0 $x) $x)
|
||||
(=> (rotl_imm 0 $x) $x)
|
||||
(=> (rotr_imm 0 $x) $x)
|
||||
(=> (ishl_imm 0 $x) $x)
|
||||
(=> (ushr_imm 0 $x) $x)
|
||||
(=> (sshr_imm 0 $x) $x)
|
||||
|
||||
;; Replace with zero.
|
||||
(=> (imul_imm 0 $x) 0)
|
||||
(=> (band_imm 0 $x) 0)
|
||||
|
||||
;; Replace with negative 1.
|
||||
(=> (bor_imm -1 $x) -1)
|
||||
|
||||
;; Transform `[(x << N) >> N]` into a (un)signed-extending move.
|
||||
;;
|
||||
;; i16 -> i8 -> i16
|
||||
(=> (when (ushr_imm 8 (ishl_imm 8 $x))
|
||||
(bit-width $x 16))
|
||||
(uextend{i16} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 8 (ishl_imm 8 $x))
|
||||
(bit-width $x 16))
|
||||
(sextend{i16} (ireduce{i8} $x)))
|
||||
;; i32 -> i8 -> i32
|
||||
(=> (when (ushr_imm 24 (ishl_imm 24 $x))
|
||||
(bit-width $x 32))
|
||||
(uextend{i32} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 24 (ishl_imm 24 $x))
|
||||
(bit-width $x 32))
|
||||
(sextend{i32} (ireduce{i8} $x)))
|
||||
;; i32 -> i16 -> i32
|
||||
(=> (when (ushr_imm 16 (ishl_imm 16 $x))
|
||||
(bit-width $x 32))
|
||||
(uextend{i32} (ireduce{i16} $x)))
|
||||
(=> (when (sshr_imm 16 (ishl_imm 16 $x))
|
||||
(bit-width $x 32))
|
||||
(sextend{i32} (ireduce{i16} $x)))
|
||||
;; i64 -> i8 -> i64
|
||||
(=> (when (ushr_imm 56 (ishl_imm 56 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i8} $x)))
|
||||
(=> (when (sshr_imm 56 (ishl_imm 56 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i8} $x)))
|
||||
;; i64 -> i16 -> i64
|
||||
(=> (when (ushr_imm 48 (ishl_imm 48 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i16} $x)))
|
||||
(=> (when (sshr_imm 48 (ishl_imm 48 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i16} $x)))
|
||||
;; i64 -> i32 -> i64
|
||||
(=> (when (ushr_imm 32 (ishl_imm 32 $x))
|
||||
(bit-width $x 64))
|
||||
(uextend{i64} (ireduce{i32} $x)))
|
||||
(=> (when (sshr_imm 32 (ishl_imm 32 $x))
|
||||
(bit-width $x 64))
|
||||
(sextend{i64} (ireduce{i32} $x)))
|
||||
|
||||
;; Fold away redundant `bint` instructions that accept both integer and boolean
|
||||
;; arguments.
|
||||
(=> (select (bint $x) $y $z) (select $x $y $z))
|
||||
(=> (brz (bint $x)) (brz $x))
|
||||
(=> (brnz (bint $x)) (brnz $x))
|
||||
(=> (trapz (bint $x)) (trapz $x))
|
||||
(=> (trapnz (bint $x)) (trapnz $x))
|
||||
|
||||
;; Fold comparisons into branch operations when possible.
|
||||
;;
|
||||
;; This matches against operations which compare against zero, then use the
|
||||
;; result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
;; single `brz` or `brnz`.
|
||||
(=> (brnz (icmp_imm ne 0 $x)) (brnz $x))
|
||||
(=> (brz (icmp_imm ne 0 $x)) (brz $x))
|
||||
(=> (brnz (icmp_imm eq 0 $x)) (brz $x))
|
||||
(=> (brz (icmp_imm eq 0 $x)) (brnz $x))
|
||||
|
||||
;; Division and remainder by constants.
|
||||
;;
|
||||
;; TODO: this section is incomplete, and a bunch of related optimizations are
|
||||
;; still hand-coded in `simple_preopt.rs`.
|
||||
|
||||
;; (Division by one is handled above.)
|
||||
|
||||
;; Remainder by one is zero.
|
||||
(=> (urem_imm 1 $x) 0)
|
||||
(=> (srem_imm 1 $x) 0)
|
||||
|
||||
;; Division by a power of two -> shift right.
|
||||
(=> (when (udiv_imm $C $x)
|
||||
(is-power-of-two $C))
|
||||
(ushr_imm $(log2 $C) $x))
|
Двоичный файл не отображается.
|
@ -1,393 +0,0 @@
|
|||
//! A Constant-Phi-Node removal pass.
|
||||
|
||||
use log::info;
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::EntityList;
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::Function;
|
||||
use crate::ir::{Block, Inst, Value};
|
||||
use crate::timing;
|
||||
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::vec::Vec;
|
||||
|
||||
// A note on notation. For the sake of clarity, this file uses the phrase
|
||||
// "formal parameters" to mean the `Value`s listed in the block head, and
|
||||
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
|
||||
//
|
||||
// block4(v16: i32, v18: i32): <-- formal parameters
|
||||
// ...
|
||||
// brnz v27, block7(v22, v24) <-- actual parameters
|
||||
// jump block6
|
||||
|
||||
// This transformation pass (conceptually) partitions all values in the
|
||||
// function into two groups:
|
||||
//
|
||||
// * Group A: values defined by block formal parameters, except for the entry block.
|
||||
//
|
||||
// * Group B: All other values: that is, values defined by instructions,
|
||||
// and the formals of the entry block.
|
||||
//
|
||||
// For each value in Group A, it attempts to establish whether it will have
|
||||
// the value of exactly one member of Group B. If so, the formal parameter is
|
||||
// deleted, all corresponding actual parameters (in jumps/branches to the
|
||||
// defining block) are deleted, and a rename is inserted.
|
||||
//
|
||||
// The entry block is special-cased because (1) we don't know what values flow
|
||||
// to its formals and (2) in any case we can't change its formals.
|
||||
//
|
||||
// Work proceeds in three phases.
|
||||
//
|
||||
// * Phase 1: examine all instructions. For each block, make up a useful
|
||||
// grab-bag of information, `BlockSummary`, that summarises the block's
|
||||
// formals and jump/branch instruction. This is used by Phases 2 and 3.
|
||||
//
|
||||
// * Phase 2: for each value in Group A, try to find a single Group B value
|
||||
// that flows to it. This is done using a classical iterative forward
|
||||
// dataflow analysis over a simple constant-propagation style lattice. It
|
||||
// converges quickly in practice -- I have seen at most 4 iterations. This
|
||||
// is relatively cheap because the iteration is done over the
|
||||
// `BlockSummary`s, and does not visit each instruction. The resulting
|
||||
// fixed point is stored in a `SolverState`.
|
||||
//
|
||||
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
|
||||
// remove redundant formals and actuals, and to insert suitable renames.
|
||||
//
|
||||
// Note that the effectiveness of the analysis depends on on the fact that
|
||||
// there are no copy instructions in Cranelift's IR. If there were, the
|
||||
// computation of `actual_absval` in Phase 2 would have to be extended to
|
||||
// chase through such copies.
|
||||
//
|
||||
// For large functions, the analysis cost using the new AArch64 backend is about
|
||||
// 0.6% of the non-optimising compile time, as measured by instruction counts.
|
||||
// This transformation usually pays for itself several times over, though, by
|
||||
// reducing the isel/regalloc cost downstream. Gains of up to 7% have been
|
||||
// seen for large functions.
|
||||
|
||||
// The `Value`s (Group B) that can flow to a formal parameter (Group A).
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
enum AbstractValue {
|
||||
// Two or more values flow to this formal.
|
||||
Many,
|
||||
// Exactly one value, as stated, flows to this formal. The `Value`s that
|
||||
// can appear here are exactly: `Value`s defined by `Inst`s, plus the
|
||||
// `Value`s defined by the formals of the entry block. Note that this is
|
||||
// exactly the set of `Value`s that are *not* tracked in the solver below
|
||||
// (see `SolverState`).
|
||||
One(Value /*Group B*/),
|
||||
// No value flows to this formal.
|
||||
None,
|
||||
}
|
||||
|
||||
impl AbstractValue {
|
||||
fn join(self, other: AbstractValue) -> AbstractValue {
|
||||
match (self, other) {
|
||||
// Joining with `None` has no effect
|
||||
(AbstractValue::None, p2) => p2,
|
||||
(p1, AbstractValue::None) => p1,
|
||||
// Joining with `Many` produces `Many`
|
||||
(AbstractValue::Many, _p2) => AbstractValue::Many,
|
||||
(_p1, AbstractValue::Many) => AbstractValue::Many,
|
||||
// The only interesting case
|
||||
(AbstractValue::One(v1), AbstractValue::One(v2)) => {
|
||||
if v1 == v2 {
|
||||
AbstractValue::One(v1)
|
||||
} else {
|
||||
AbstractValue::Many
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn is_one(self) -> bool {
|
||||
if let AbstractValue::One(_) = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For some block, a useful bundle of info. The `Block` itself is not stored
|
||||
// here since it will be the key in the associated `FxHashMap` -- see
|
||||
// `summaries` below. For the `SmallVec` tuning params: most blocks have
|
||||
// few parameters, hence `4`. And almost all blocks have either one or two
|
||||
// successors, hence `2`.
|
||||
#[derive(Debug)]
|
||||
struct BlockSummary {
|
||||
// Formal parameters for this `Block`
|
||||
formals: SmallVec<[Value; 4] /*Group A*/>,
|
||||
// For each `Inst` in this block that transfers to another block: the
|
||||
// `Inst` itself, the destination `Block`, and the actual parameters
|
||||
// passed. We don't bother to include transfers that pass zero parameters
|
||||
// since that makes more work for the solver for no purpose.
|
||||
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
|
||||
}
|
||||
impl BlockSummary {
|
||||
fn new(formals: SmallVec<[Value; 4]>) -> Self {
|
||||
Self {
|
||||
formals,
|
||||
dests: smallvec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Solver state. This holds a AbstractValue for each formal parameter, except
|
||||
// for those from the entry block.
|
||||
struct SolverState {
|
||||
absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
|
||||
}
|
||||
impl SolverState {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
absvals: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
fn get(&self, actual: Value) -> AbstractValue {
|
||||
match self.absvals.get(&actual) {
|
||||
Some(lp) => *lp,
|
||||
None => panic!("SolverState::get: formal param {:?} is untracked?!", actual),
|
||||
}
|
||||
}
|
||||
fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
|
||||
self.absvals.get(&actual)
|
||||
}
|
||||
fn set(&mut self, actual: Value, lp: AbstractValue) {
|
||||
match self.absvals.insert(actual, lp) {
|
||||
Some(_old_lp) => {}
|
||||
None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect phis in `func` that will only ever produce one value, using a
|
||||
/// classic forward dataflow analysis. Then remove them.
|
||||
#[inline(never)]
|
||||
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::remove_constant_phis();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
// Get the blocks, in reverse postorder
|
||||
let mut blocks_reverse_postorder = Vec::<Block>::new();
|
||||
for block in domtree.cfg_postorder() {
|
||||
blocks_reverse_postorder.push(*block);
|
||||
}
|
||||
blocks_reverse_postorder.reverse();
|
||||
|
||||
// Phase 1 of 3: for each block, make a summary containing all relevant
|
||||
// info. The solver will iterate over the summaries, rather than having
|
||||
// to inspect each instruction in each block.
|
||||
let mut summaries = FxHashMap::<Block, BlockSummary>::default();
|
||||
|
||||
for b in &blocks_reverse_postorder {
|
||||
let formals = func.dfg.block_params(*b);
|
||||
let mut summary = BlockSummary::new(SmallVec::from(formals));
|
||||
|
||||
for inst in func.layout.block_insts(*b) {
|
||||
let idetails = &func.dfg[inst];
|
||||
// Note that multi-dest transfers (i.e., branch tables) don't
|
||||
// carry parameters in our IR, so we only have to care about
|
||||
// `SingleDest` here.
|
||||
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
|
||||
{
|
||||
let inst_var_args = func.dfg.inst_variable_args(inst);
|
||||
// Skip branches/jumps that carry no params.
|
||||
if inst_var_args.len() > 0 {
|
||||
let mut actuals = SmallVec::<[Value; 4]>::new();
|
||||
for arg in inst_var_args {
|
||||
let arg = func.dfg.resolve_aliases(*arg);
|
||||
actuals.push(arg);
|
||||
}
|
||||
summary.dests.push((inst, dest, actuals));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the invariant that all blocks (except for the entry) appear
|
||||
// in the summary, *unless* they have neither formals nor any
|
||||
// param-carrying branches/jumps.
|
||||
if formals.len() > 0 || summary.dests.len() > 0 {
|
||||
summaries.insert(*b, summary);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2 of 3: iterate over the summaries in reverse postorder,
|
||||
// computing new `AbstractValue`s for each tracked `Value`. The set of
|
||||
// tracked `Value`s is exactly Group A as described above.
|
||||
|
||||
let entry_block = func
|
||||
.layout
|
||||
.entry_block()
|
||||
.expect("remove_constant_phis: entry block unknown");
|
||||
|
||||
// Set up initial solver state
|
||||
let mut state = SolverState::new();
|
||||
|
||||
for b in &blocks_reverse_postorder {
|
||||
// For each block, get the formals
|
||||
if *b == entry_block {
|
||||
continue;
|
||||
}
|
||||
let formals: &[Value] = func.dfg.block_params(*b);
|
||||
for formal in formals {
|
||||
let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
|
||||
assert!(mb_old_absval.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
// Solve: repeatedly traverse the blocks in reverse postorder, until there
|
||||
// are no changes.
|
||||
let mut iter_no = 0;
|
||||
loop {
|
||||
iter_no += 1;
|
||||
let mut changed = false;
|
||||
|
||||
for src in &blocks_reverse_postorder {
|
||||
let mb_src_summary = summaries.get(src);
|
||||
// The src block might have no summary. This means it has no
|
||||
// branches/jumps that carry parameters *and* it doesn't take any
|
||||
// parameters itself. Phase 1 ensures this. So we can ignore it.
|
||||
if mb_src_summary.is_none() {
|
||||
continue;
|
||||
}
|
||||
let src_summary = mb_src_summary.unwrap();
|
||||
for (_inst, dst, src_actuals) in &src_summary.dests {
|
||||
assert!(*dst != entry_block);
|
||||
// By contrast, the dst block must have a summary. Phase 1
|
||||
// will have only included an entry in `src_summary.dests` if
|
||||
// that branch/jump carried at least one parameter. So the
|
||||
// dst block does take parameters, so it must have a summary.
|
||||
let dst_summary = summaries
|
||||
.get(dst)
|
||||
.expect("remove_constant_phis: dst block has no summary");
|
||||
let dst_formals = &dst_summary.formals;
|
||||
assert!(src_actuals.len() == dst_formals.len());
|
||||
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
|
||||
// Find the abstract value for `actual`. If it is a block
|
||||
// formal parameter then the most recent abstract value is
|
||||
// to be found in the solver state. If not, then it's a
|
||||
// real value defining point (not a phi), in which case
|
||||
// return it itself.
|
||||
let actual_absval = match state.maybe_get(*actual) {
|
||||
Some(pt) => *pt,
|
||||
None => AbstractValue::One(*actual),
|
||||
};
|
||||
|
||||
// And `join` the new value with the old.
|
||||
let formal_absval_old = state.get(*formal);
|
||||
let formal_absval_new = formal_absval_old.join(actual_absval);
|
||||
if formal_absval_new != formal_absval_old {
|
||||
changed = true;
|
||||
state.set(*formal, formal_absval_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !changed {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut n_consts = 0;
|
||||
for absval in state.absvals.values() {
|
||||
if absval.is_one() {
|
||||
n_consts += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3 of 3: edit the function to remove constant formals, using the
|
||||
// summaries and the final solver state as a guide.
|
||||
|
||||
// Make up a set of blocks that need editing.
|
||||
let mut need_editing = FxHashSet::<Block>::default();
|
||||
for (block, summary) in &summaries {
|
||||
if *block == entry_block {
|
||||
continue;
|
||||
}
|
||||
for formal in &summary.formals {
|
||||
let formal_absval = state.get(*formal);
|
||||
if formal_absval.is_one() {
|
||||
need_editing.insert(*block);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Firstly, deal with the formals. For each formal which is redundant,
|
||||
// remove it, and also add a reroute from it to the constant value which
|
||||
// it we know it to be.
|
||||
for b in &need_editing {
|
||||
let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
|
||||
let formals: &[Value] = func.dfg.block_params(*b);
|
||||
for formal in formals {
|
||||
// The state must give an absval for `formal`.
|
||||
if let AbstractValue::One(replacement_val) = state.get(*formal) {
|
||||
del_these.push((*formal, replacement_val));
|
||||
}
|
||||
}
|
||||
// We can delete the formals in any order. However,
|
||||
// `remove_block_param` works by sliding backwards all arguments to
|
||||
// the right of the it is asked to delete. Hence when removing more
|
||||
// than one formal, it is significantly more efficient to ask it to
|
||||
// remove the rightmost formal first, and hence this `reverse`.
|
||||
del_these.reverse();
|
||||
for (redundant_formal, replacement_val) in del_these {
|
||||
func.dfg.remove_block_param(redundant_formal);
|
||||
func.dfg.change_to_alias(redundant_formal, replacement_val);
|
||||
}
|
||||
}
|
||||
|
||||
// Secondly, visit all branch insns. If the destination has had its
|
||||
// formals changed, change the actuals accordingly. Don't scan all insns,
|
||||
// rather just visit those as listed in the summaries we prepared earlier.
|
||||
for (_src_block, summary) in &summaries {
|
||||
for (inst, dst_block, _src_actuals) in &summary.dests {
|
||||
if !need_editing.contains(dst_block) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let old_actuals = func.dfg[*inst].take_value_list().unwrap();
|
||||
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
|
||||
let num_fixed_actuals = func.dfg[*inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let dst_summary = summaries.get(&dst_block).unwrap();
|
||||
|
||||
// Check that the numbers of arguments make sense.
|
||||
assert!(num_fixed_actuals <= num_old_actuals);
|
||||
assert!(num_fixed_actuals + dst_summary.formals.len() == num_old_actuals);
|
||||
|
||||
// Create a new value list.
|
||||
let mut new_actuals = EntityList::<Value>::new();
|
||||
// Copy the fixed args to the new list
|
||||
for i in 0..num_fixed_actuals {
|
||||
let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
|
||||
new_actuals.push(val, &mut func.dfg.value_lists);
|
||||
}
|
||||
|
||||
// Copy the variable args (the actual block params) to the new
|
||||
// list, filtering out redundant ones.
|
||||
for i in 0..dst_summary.formals.len() {
|
||||
let actual_i = old_actuals
|
||||
.get(num_fixed_actuals + i, &func.dfg.value_lists)
|
||||
.unwrap();
|
||||
let formal_i = dst_summary.formals[i];
|
||||
let is_redundant = state.get(formal_i).is_one();
|
||||
if !is_redundant {
|
||||
new_actuals.push(actual_i, &mut func.dfg.value_lists);
|
||||
}
|
||||
}
|
||||
func.dfg[*inst].put_value_list(new_actuals);
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",
|
||||
iter_no,
|
||||
state.absvals.len(),
|
||||
n_consts
|
||||
);
|
||||
}
|
|
@ -10,8 +10,10 @@ use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
|
|||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{
|
||||
condcodes::{CondCode, IntCC},
|
||||
instructions::Opcode,
|
||||
types::{I32, I64},
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::{Opcode, ValueList},
|
||||
types::{I16, I32, I64, I8},
|
||||
Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
|
@ -142,7 +144,7 @@ fn package_up_divrem_info(
|
|||
/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
|
||||
/// signedness, operation size and div-vs-rem-ness in a handy bundle.
|
||||
fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
|
||||
if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
|
||||
if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] {
|
||||
let (is_signed, is_rem) = match opcode {
|
||||
Opcode::UdivImm => (false, false),
|
||||
Opcode::UremImm => (false, true),
|
||||
|
@ -466,6 +468,340 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
|
||||
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = dfg[candidate_inst]
|
||||
{
|
||||
return Some(imm);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
|
||||
/// Returns true if the final instruction has been converted to such a move.
|
||||
fn try_fold_extended_move(
|
||||
pos: &mut FuncCursor,
|
||||
inst: Inst,
|
||||
opcode: Opcode,
|
||||
arg: Value,
|
||||
imm: immediates::Imm64,
|
||||
) -> bool {
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm {
|
||||
opcode: Opcode::IshlImm,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if imm != *prev_imm {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let imm_bits: i64 = imm.into();
|
||||
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
|
||||
8 => I8,
|
||||
16 => I16,
|
||||
32 => I32,
|
||||
_ => return false,
|
||||
};
|
||||
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
|
||||
|
||||
// This becomes a no-op, since ireduce_ty has a smaller lane width than
|
||||
// the argument type (also the destination type).
|
||||
let arg = *prev_arg;
|
||||
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
|
||||
|
||||
if opcode == Opcode::UshrImm {
|
||||
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
|
||||
/// simplifications.
|
||||
///
|
||||
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
|
||||
/// controlling type's width of the instruction. This would result in an illegal instruction that
|
||||
/// would likely be expanded back into an instruction on smaller types with the same initial
|
||||
/// opcode, creating unnecessary churn.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
Opcode::Ifcmp => Opcode::IfcmpImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(new_opcode, ty, imm, args[0]);
|
||||
|
||||
// Repeat for BinaryImm simplification.
|
||||
simplify(pos, inst, native_word_width);
|
||||
}
|
||||
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(new_opcode, ty, imm, args[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
if let Opcode::AdjustSpDown = opcode {
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
|
||||
// Note this works for both positive and negative immediate values.
|
||||
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::BinaryImm { opcode, arg, imm } => {
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
|
||||
let mut arg = arg;
|
||||
let mut imm = imm;
|
||||
match opcode {
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BandImm
|
||||
| Opcode::BxorImm => {
|
||||
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm {
|
||||
opcode: prev_opcode,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) {
|
||||
let lhs: i64 = imm.into();
|
||||
let rhs: i64 = (*prev_imm).into();
|
||||
let new_imm = match opcode {
|
||||
Opcode::BorImm => lhs | rhs,
|
||||
Opcode::BandImm => lhs & rhs,
|
||||
Opcode::BxorImm => lhs ^ rhs,
|
||||
Opcode::IaddImm => lhs.wrapping_add(rhs),
|
||||
Opcode::ImulImm => lhs.wrapping_mul(rhs),
|
||||
_ => panic!("can't happen"),
|
||||
};
|
||||
let new_imm = immediates::Imm64::from(new_imm);
|
||||
let new_arg = *prev_arg;
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm(opcode, ty, new_imm, new_arg);
|
||||
imm = new_imm;
|
||||
arg = new_arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::UshrImm | Opcode::SshrImm => {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
|
||||
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Replace operations that are no-ops.
|
||||
match (opcode, imm.into()) {
|
||||
(Opcode::IaddImm, 0)
|
||||
| (Opcode::ImulImm, 1)
|
||||
| (Opcode::SdivImm, 1)
|
||||
| (Opcode::UdivImm, 1)
|
||||
| (Opcode::BorImm, 0)
|
||||
| (Opcode::BandImm, -1)
|
||||
| (Opcode::BxorImm, 0)
|
||||
| (Opcode::RotlImm, 0)
|
||||
| (Opcode::RotrImm, 0)
|
||||
| (Opcode::IshlImm, 0)
|
||||
| (Opcode::UshrImm, 0)
|
||||
| (Opcode::SshrImm, 0) => {
|
||||
// Alias the result value with the original argument.
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
|
||||
}
|
||||
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
|
||||
// Replace by zero.
|
||||
pos.func.dfg.replace(inst).iconst(ty, 0);
|
||||
}
|
||||
(Opcode::BorImm, -1) => {
|
||||
// Replace by minus one.
|
||||
pos.func.dfg.replace(inst).iconst(ty, -1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::CondTrap { .. }
|
||||
| InstructionData::Branch { .. }
|
||||
| InstructionData::Ternary {
|
||||
opcode: Opcode::Select,
|
||||
..
|
||||
} => {
|
||||
// Fold away a redundant `bint`.
|
||||
let condition_def = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
pos.func.dfg.value_def(args[0])
|
||||
};
|
||||
if let ValueDef::Result(def_inst, _) = condition_def {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
struct BranchOptInfo {
|
||||
br_inst: Inst,
|
||||
cmp_arg: Value,
|
||||
args: ValueList,
|
||||
new_opcode: Opcode,
|
||||
}
|
||||
|
||||
/// Fold comparisons into branch operations when possible.
|
||||
///
|
||||
/// This matches against operations which compare against zero, then use the
|
||||
/// result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
/// single `brz` or `brnz`.
|
||||
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
|
||||
let mut info = if let InstructionData::Branch {
|
||||
opcode: br_opcode,
|
||||
args: ref br_args,
|
||||
..
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let first_arg = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
args[0]
|
||||
};
|
||||
|
||||
let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
icmp_inst
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::IntCompareImm {
|
||||
opcode: Opcode::IcmpImm,
|
||||
arg: cmp_arg,
|
||||
cond: cmp_cond,
|
||||
imm: cmp_imm,
|
||||
} = pos.func.dfg[icmp_inst]
|
||||
{
|
||||
let cmp_imm: i64 = cmp_imm.into();
|
||||
if cmp_imm != 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// icmp_imm returns non-zero when the comparison is true. So, if
|
||||
// we're branching on zero, we need to invert the condition.
|
||||
let cond = match br_opcode {
|
||||
Opcode::Brz => cmp_cond.inverse(),
|
||||
Opcode::Brnz => cmp_cond,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let new_opcode = match cond {
|
||||
IntCC::Equal => Opcode::Brz,
|
||||
IntCC::NotEqual => Opcode::Brnz,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
BranchOptInfo {
|
||||
br_inst: inst,
|
||||
cmp_arg,
|
||||
args: br_args.clone(),
|
||||
new_opcode,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
|
||||
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
|
||||
*opcode = info.new_opcode;
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
||||
enum BranchOrderKind {
|
||||
BrzToBrnz(Value),
|
||||
BrnzToBrz(Value),
|
||||
|
@ -608,490 +944,15 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block,
|
|||
cfg.recompute_block(pos.func, block);
|
||||
}
|
||||
|
||||
#[cfg(feature = "enable-peepmatic")]
|
||||
mod simplify {
|
||||
use super::*;
|
||||
use crate::peepmatic::ValueOrInst;
|
||||
|
||||
pub type PeepholeOptimizer<'a, 'b> =
|
||||
peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
|
||||
|
||||
pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
|
||||
crate::peepmatic::preopt(isa)
|
||||
}
|
||||
|
||||
pub fn apply_all<'a, 'b>(
|
||||
optimizer: &mut PeepholeOptimizer<'a, 'b>,
|
||||
pos: &mut FuncCursor<'a>,
|
||||
inst: Inst,
|
||||
_native_word_width: u32,
|
||||
) {
|
||||
// After we apply one optimization, that might make another
|
||||
// optimization applicable. Keep running the peephole optimizer
|
||||
// until either:
|
||||
//
|
||||
// * No optimization applied, and therefore it doesn't make sense to
|
||||
// try again, because no optimization will apply again.
|
||||
//
|
||||
// * Or when we replaced an instruction with an alias to an existing
|
||||
// value, because we already ran the peephole optimizer over the
|
||||
// aliased value's instruction in an early part of the traversal
|
||||
// over the function.
|
||||
while let Some(ValueOrInst::Inst(new_inst)) =
|
||||
optimizer.apply_one(pos, ValueOrInst::Inst(inst))
|
||||
{
|
||||
// We transplanted a new instruction into the current
|
||||
// instruction, so the "new" instruction is actually the same
|
||||
// one, just with different data.
|
||||
debug_assert_eq!(new_inst, inst);
|
||||
}
|
||||
debug_assert_eq!(pos.current_inst(), Some(inst));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "enable-peepmatic"))]
|
||||
mod simplify {
|
||||
use super::*;
|
||||
use crate::ir::{
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::{Opcode, ValueList},
|
||||
types::{B8, I16, I32, I8},
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct PeepholeOptimizer<'a, 'b> {
|
||||
phantom: PhantomData<(&'a (), &'b ())>,
|
||||
}
|
||||
|
||||
pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
|
||||
PeepholeOptimizer {
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_all<'a, 'b>(
|
||||
_optimizer: &mut PeepholeOptimizer<'a, 'b>,
|
||||
pos: &mut FuncCursor<'a>,
|
||||
inst: Inst,
|
||||
native_word_width: u32,
|
||||
) {
|
||||
simplify(pos, inst, native_word_width);
|
||||
branch_opt(pos, inst);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
|
||||
if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = dfg[candidate_inst]
|
||||
{
|
||||
return Some(imm);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
|
||||
/// Returns true if the final instruction has been converted to such a move.
|
||||
fn try_fold_extended_move(
|
||||
pos: &mut FuncCursor,
|
||||
inst: Inst,
|
||||
opcode: Opcode,
|
||||
arg: Value,
|
||||
imm: immediates::Imm64,
|
||||
) -> bool {
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IshlImm,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if imm != *prev_imm {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dest_ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let imm_bits: i64 = imm.into();
|
||||
let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
|
||||
8 => I8,
|
||||
16 => I16,
|
||||
32 => I32,
|
||||
_ => return false,
|
||||
};
|
||||
let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
|
||||
|
||||
// This becomes a no-op, since ireduce_ty has a smaller lane width than
|
||||
// the argument type (also the destination type).
|
||||
let arg = *prev_arg;
|
||||
let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
|
||||
|
||||
if opcode == Opcode::UshrImm {
|
||||
pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other minor
|
||||
/// simplifications.
|
||||
///
|
||||
/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
|
||||
/// controlling type's width of the instruction. This would result in an illegal instruction that
|
||||
/// would likely be expanded back into an instruction on smaller types with the same initial
|
||||
/// opcode, creating unnecessary churn.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
Opcode::Ifcmp => Opcode::IfcmpImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[0]);
|
||||
|
||||
// Repeat for BinaryImm simplification.
|
||||
simplify(pos, inst, native_word_width);
|
||||
}
|
||||
} else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
if ty.bytes() <= native_word_width {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(new_opcode, ty, imm, args[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
if let Opcode::AdjustSpDown = opcode {
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
|
||||
// Note this works for both positive and negative immediate values.
|
||||
pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::BinaryImm64 { opcode, arg, imm } => {
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
|
||||
let mut arg = arg;
|
||||
let mut imm = imm;
|
||||
match opcode {
|
||||
Opcode::IaddImm
|
||||
| Opcode::ImulImm
|
||||
| Opcode::BorImm
|
||||
| Opcode::BandImm
|
||||
| Opcode::BxorImm => {
|
||||
// Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
|
||||
if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
|
||||
if let InstructionData::BinaryImm64 {
|
||||
opcode: prev_opcode,
|
||||
arg: prev_arg,
|
||||
imm: prev_imm,
|
||||
} = &pos.func.dfg[arg_inst]
|
||||
{
|
||||
if opcode == *prev_opcode
|
||||
&& ty == pos.func.dfg.ctrl_typevar(arg_inst)
|
||||
{
|
||||
let lhs: i64 = imm.into();
|
||||
let rhs: i64 = (*prev_imm).into();
|
||||
let new_imm = match opcode {
|
||||
Opcode::BorImm => lhs | rhs,
|
||||
Opcode::BandImm => lhs & rhs,
|
||||
Opcode::BxorImm => lhs ^ rhs,
|
||||
Opcode::IaddImm => lhs.wrapping_add(rhs),
|
||||
Opcode::ImulImm => lhs.wrapping_mul(rhs),
|
||||
_ => panic!("can't happen"),
|
||||
};
|
||||
let new_imm = immediates::Imm64::from(new_imm);
|
||||
let new_arg = *prev_arg;
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.BinaryImm64(opcode, ty, new_imm, new_arg);
|
||||
imm = new_imm;
|
||||
arg = new_arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::UshrImm | Opcode::SshrImm => {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
|
||||
&& try_fold_extended_move(pos, inst, opcode, arg, imm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// Replace operations that are no-ops.
|
||||
match (opcode, imm.into()) {
|
||||
(Opcode::IaddImm, 0)
|
||||
| (Opcode::ImulImm, 1)
|
||||
| (Opcode::SdivImm, 1)
|
||||
| (Opcode::UdivImm, 1)
|
||||
| (Opcode::BorImm, 0)
|
||||
| (Opcode::BandImm, -1)
|
||||
| (Opcode::BxorImm, 0)
|
||||
| (Opcode::RotlImm, 0)
|
||||
| (Opcode::RotrImm, 0)
|
||||
| (Opcode::IshlImm, 0)
|
||||
| (Opcode::UshrImm, 0)
|
||||
| (Opcode::SshrImm, 0) => {
|
||||
// Alias the result value with the original argument.
|
||||
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
|
||||
}
|
||||
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
|
||||
// Replace by zero.
|
||||
pos.func.dfg.replace(inst).iconst(ty, 0);
|
||||
}
|
||||
(Opcode::BorImm, -1) => {
|
||||
// Replace by minus one.
|
||||
pos.func.dfg.replace(inst).iconst(ty, -1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
|
||||
if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::CondTrap { .. }
|
||||
| InstructionData::Branch { .. }
|
||||
| InstructionData::Ternary {
|
||||
opcode: Opcode::Select,
|
||||
..
|
||||
} => {
|
||||
// Fold away a redundant `bint`.
|
||||
let condition_def = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
pos.func.dfg.value_def(args[0])
|
||||
};
|
||||
if let ValueDef::Result(def_inst, _) = condition_def {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Ternary {
|
||||
opcode: Opcode::Bitselect,
|
||||
args,
|
||||
} => {
|
||||
let old_cond_type = pos.func.dfg.value_type(args[0]);
|
||||
if !old_cond_type.is_vector() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Replace bitselect with vselect if each lane of controlling mask is either
|
||||
// all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
|
||||
// while vselect can be encoded using single BLEND instruction.
|
||||
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
|
||||
InstructionData::Unary {
|
||||
opcode: Opcode::RawBitcast,
|
||||
arg,
|
||||
} => {
|
||||
// If controlling mask is raw-bitcasted boolean vector then
|
||||
// we know each lane is either all zeroes or ones,
|
||||
// so we can use vselect instruction instead.
|
||||
let arg_type = pos.func.dfg.value_type(arg);
|
||||
if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
|
||||
return;
|
||||
}
|
||||
(arg, arg_type)
|
||||
}
|
||||
InstructionData::UnaryConst {
|
||||
opcode: Opcode::Vconst,
|
||||
constant_handle,
|
||||
} => {
|
||||
// If each byte of controlling mask is 0x00 or 0xFF then
|
||||
// we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
|
||||
// Bitselect operates at bit level, so the lane types don't matter.
|
||||
let const_data = pos.func.dfg.constants.get(constant_handle);
|
||||
if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
|
||||
return;
|
||||
}
|
||||
let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
|
||||
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
|
||||
let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
|
||||
let old_arg_type = pos.func.dfg.value_type(args[1]);
|
||||
|
||||
if arg_type != old_arg_type {
|
||||
// Operands types must match, we need to add bitcasts.
|
||||
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
|
||||
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
|
||||
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
||||
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
|
||||
} else {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.vselect(cond_val, args[1], args[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
struct BranchOptInfo {
|
||||
br_inst: Inst,
|
||||
cmp_arg: Value,
|
||||
args: ValueList,
|
||||
new_opcode: Opcode,
|
||||
}
|
||||
|
||||
/// Fold comparisons into branch operations when possible.
|
||||
///
|
||||
/// This matches against operations which compare against zero, then use the
|
||||
/// result in a `brz` or `brnz` branch. It folds those two operations into a
|
||||
/// single `brz` or `brnz`.
|
||||
fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
|
||||
let mut info = if let InstructionData::Branch {
|
||||
opcode: br_opcode,
|
||||
args: ref br_args,
|
||||
..
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let first_arg = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
args[0]
|
||||
};
|
||||
|
||||
let icmp_inst =
|
||||
if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
icmp_inst
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let InstructionData::IntCompareImm {
|
||||
opcode: Opcode::IcmpImm,
|
||||
arg: cmp_arg,
|
||||
cond: cmp_cond,
|
||||
imm: cmp_imm,
|
||||
} = pos.func.dfg[icmp_inst]
|
||||
{
|
||||
let cmp_imm: i64 = cmp_imm.into();
|
||||
if cmp_imm != 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// icmp_imm returns non-zero when the comparison is true. So, if
|
||||
// we're branching on zero, we need to invert the condition.
|
||||
let cond = match br_opcode {
|
||||
Opcode::Brz => cmp_cond.inverse(),
|
||||
Opcode::Brnz => cmp_cond,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let new_opcode = match cond {
|
||||
IntCC::Equal => Opcode::Brz,
|
||||
IntCC::NotEqual => Opcode::Brnz,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
BranchOptInfo {
|
||||
br_inst: inst,
|
||||
cmp_arg,
|
||||
args: br_args.clone(),
|
||||
new_opcode,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
|
||||
if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
|
||||
*opcode = info.new_opcode;
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main pre-opt pass.
|
||||
pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::preopt();
|
||||
|
||||
let mut pos = FuncCursor::new(func);
|
||||
let native_word_width = isa.pointer_bytes() as u32;
|
||||
let mut optimizer = simplify::peephole_optimizer(isa);
|
||||
|
||||
let native_word_width = isa.pointer_bytes();
|
||||
while let Some(block) = pos.next_block() {
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
|
||||
// Apply basic simplifications.
|
||||
simplify(&mut pos, inst, native_word_width as u32);
|
||||
|
||||
// Try to transform divide-by-constant into simpler operations.
|
||||
if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
|
||||
|
@ -1099,6 +960,7 @@ pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn Targ
|
|||
continue;
|
||||
}
|
||||
|
||||
branch_opt(&mut pos, inst);
|
||||
branch_order(&mut pos, cfg, block, inst);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,6 @@ define_passes! {
|
|||
gvn: "Global value numbering",
|
||||
licm: "Loop invariant code motion",
|
||||
unreachable_code: "Remove unreachable blocks",
|
||||
remove_constant_phis: "Remove constant phi-nodes",
|
||||
|
||||
regalloc: "Register allocation",
|
||||
ra_liveness: "RA liveness analysis",
|
||||
|
|
|
@ -18,9 +18,9 @@ use serde::{Deserialize, Serialize};
|
|||
pub struct ValueLocRange {
|
||||
/// The ValueLoc containing a ValueLabel during this range.
|
||||
pub loc: ValueLoc,
|
||||
/// The start of the range. It is an offset in the generated code.
|
||||
/// The start of the range.
|
||||
pub start: u32,
|
||||
/// The end of the range. It is an offset in the generated code.
|
||||
/// The end of the range.
|
||||
pub end: u32,
|
||||
}
|
||||
|
||||
|
@ -91,11 +91,6 @@ pub fn build_value_labels_ranges<T>(
|
|||
where
|
||||
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
|
||||
{
|
||||
// FIXME(#1523): New-style backend does not yet have debug info.
|
||||
if isa.get_mach_backend().is_some() {
|
||||
return HashMap::new();
|
||||
}
|
||||
|
||||
let values_labels = build_value_labels_index::<T>(func);
|
||||
|
||||
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
|
||||
|
|
|
@ -756,10 +756,10 @@ impl<'a> Verifier<'a> {
|
|||
| UnaryIeee64 { .. }
|
||||
| UnaryBool { .. }
|
||||
| Binary { .. }
|
||||
| BinaryImm8 { .. }
|
||||
| BinaryImm64 { .. }
|
||||
| BinaryImm { .. }
|
||||
| Ternary { .. }
|
||||
| TernaryImm8 { .. }
|
||||
| InsertLane { .. }
|
||||
| ExtractLane { .. }
|
||||
| Shuffle { .. }
|
||||
| IntCompare { .. }
|
||||
| IntCompareImm { .. }
|
||||
|
@ -1912,20 +1912,20 @@ impl<'a> Verifier<'a> {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
ir::InstructionData::BinaryImm8 {
|
||||
ir::InstructionData::ExtractLane {
|
||||
opcode: ir::instructions::Opcode::Extractlane,
|
||||
imm: lane,
|
||||
lane,
|
||||
arg,
|
||||
..
|
||||
}
|
||||
| ir::InstructionData::TernaryImm8 {
|
||||
| ir::InstructionData::InsertLane {
|
||||
opcode: ir::instructions::Opcode::Insertlane,
|
||||
imm: lane,
|
||||
lane,
|
||||
args: [arg, _],
|
||||
..
|
||||
} => {
|
||||
// We must be specific about the opcodes above because other instructions are using
|
||||
// the same formats.
|
||||
// the ExtractLane/InsertLane formats.
|
||||
let ty = self.func.dfg.value_type(arg);
|
||||
if u16::from(lane) >= ty.lane_count() {
|
||||
errors.fatal((
|
||||
|
|
|
@ -508,8 +508,7 @@ pub fn write_operands(
|
|||
constant_handle, ..
|
||||
} => write!(w, " {}", constant_handle),
|
||||
Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]),
|
||||
BinaryImm8 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
BinaryImm64 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
|
||||
Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]),
|
||||
MultiAry { ref args, .. } => {
|
||||
if args.is_empty() {
|
||||
|
@ -519,7 +518,8 @@ pub fn write_operands(
|
|||
}
|
||||
}
|
||||
NullAry { .. } => write!(w, " "),
|
||||
TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm),
|
||||
InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
|
||||
ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
|
||||
Shuffle { mask, args, .. } => {
|
||||
let data = dfg.immediates.get(mask).expect(
|
||||
"Expected the shuffle mask to already be inserted into the immediates table",
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"c4ee5d42f3f76a1458ec0d97b5777569906819fe5b4002512de0e69814754c53","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"5ecb434f18c343f68c7080514c71f8c79c21952d1774beffa1bf348b6dd77b05","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"d931ba5ce07a5c77c8a62bb07316db21c101bc3fa1eb6ffd396f8a8944958185","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
||||
{"files":{"Cargo.toml":"cd1dd7e4040349ff8e5e88cbc3273c2b52cb411853933de6aea8976a1a99445f","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"96ceffbfd88fb06e3b41aa4d3087cffbbf8441d04eba7ab09662a72ab600a321","src/boxed_slice.rs":"69d539b72460c0aba1d30e0b72efb0c29d61558574d751c784794e14abf41352","src/iter.rs":"4a4d3309fe9aad14fd7702f02459f4277b4ddb50dba700e58dcc75665ffebfb3","src/keys.rs":"b8c2fba26dee15bf3d1880bb2b41e8d66fe1428d242ee6d9fd30ee94bbd0407d","src/lib.rs":"f6d738a46f1dca8b0c82a5910d86cd572a3585ab7ef9f73dac96962529069190","src/list.rs":"4bf609eb7cc7c000c18da746596d5fcc67eece3f919ee2d76e19f6ac371640d1","src/map.rs":"546b36be4cbbd2423bacbed69cbe114c63538c3f635e15284ab8e4223e717705","src/packed_option.rs":"dccb3dd6fc87eba0101de56417f21cab67a4394831df9fa41e3bbddb70cdf694","src/primary.rs":"30d5e2ab8427fd2b2c29da395812766049e3c40845cc887af3ee233dba91a063","src/set.rs":"b040054b8baa0599e64df9ee841640688e2a73b6eabbdc5a4f15334412db052a","src/sparse.rs":"536e31fdcf64450526f5e5b85e97406c26b998bc7e0d8161b6b449c24265449f"},"package":null}
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-entity"
|
||||
version = "0.64.0"
|
||||
version = "0.63.0"
|
||||
description = "Data structures using entity references as mapping keys"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-entity"
|
||||
|
|
|
@ -85,10 +85,6 @@ macro_rules! entity_impl {
|
|||
fn reserved_value() -> $entity {
|
||||
$entity($crate::__core::u32::MAX)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == $crate::__core::u32::MAX
|
||||
}
|
||||
}
|
||||
|
||||
impl $entity {
|
||||
|
|
|
@ -11,11 +11,9 @@ use core::fmt;
|
|||
use core::mem;
|
||||
|
||||
/// Types that have a reserved value which can't be created any other way.
|
||||
pub trait ReservedValue {
|
||||
pub trait ReservedValue: Eq {
|
||||
/// Create an instance of the reserved value.
|
||||
fn reserved_value() -> Self;
|
||||
/// Checks whether value is the reserved one.
|
||||
fn is_reserved_value(&self) -> bool;
|
||||
}
|
||||
|
||||
/// Packed representation of `Option<T>`.
|
||||
|
@ -25,12 +23,12 @@ pub struct PackedOption<T: ReservedValue>(T);
|
|||
impl<T: ReservedValue> PackedOption<T> {
|
||||
/// Returns `true` if the packed option is a `None` value.
|
||||
pub fn is_none(&self) -> bool {
|
||||
self.0.is_reserved_value()
|
||||
self.0 == T::reserved_value()
|
||||
}
|
||||
|
||||
/// Returns `true` if the packed option is a `Some` value.
|
||||
pub fn is_some(&self) -> bool {
|
||||
!self.0.is_reserved_value()
|
||||
self.0 != T::reserved_value()
|
||||
}
|
||||
|
||||
/// Expand the packed option into a normal `Option`.
|
||||
|
@ -77,7 +75,7 @@ impl<T: ReservedValue> From<T> for PackedOption<T> {
|
|||
/// Convert `t` into a packed `Some(x)`.
|
||||
fn from(t: T) -> Self {
|
||||
debug_assert!(
|
||||
!t.is_reserved_value(),
|
||||
t != T::reserved_value(),
|
||||
"Can't make a PackedOption from the reserved value."
|
||||
);
|
||||
Self(t)
|
||||
|
@ -125,10 +123,6 @@ mod tests {
|
|||
fn reserved_value() -> Self {
|
||||
NoC(13)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == 13
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -151,10 +145,6 @@ mod tests {
|
|||
fn reserved_value() -> Self {
|
||||
Ent(13)
|
||||
}
|
||||
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == 13
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"084cc46ba2d09a2ee8085c37be8624b3cc249d381f1cbee6df468930ce15e415","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"d1d8477572f70cc28f71424af272d9eec0adf58af657ff153c4acbbb39822a50","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
||||
{"files":{"Cargo.toml":"d152c6553c0091b43d9ea0cd547dc49440e6321eb792bf47fdd3245aed046513","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"f750cc995c66635dab7f2b977266cf9235d984b585ab8145bdb858ea8e1b0fb4","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче